diff --git a/core/api/core.api b/core/api/core.api index 0d9aa53c8e..f281641220 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -2691,7 +2691,7 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/ColumnDescri public abstract fun getPath ()Lorg/jetbrains/kotlinx/dataframe/columns/ColumnPath; public abstract fun getStd ()D public abstract fun getTop ()Ljava/lang/Object; - public abstract fun getType ()Lkotlin/reflect/KType; + public abstract fun getType ()Ljava/lang/String; public abstract fun getUnique ()I } @@ -2719,7 +2719,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ColumnDescription_Extensi public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lkotlin/reflect/KType; + public static final fun ColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun ColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)I public static final fun NullableColumnDescription_count (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; @@ -2745,7 +2745,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/ColumnDescription_Extensi public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_top (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Object; public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; - public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Lkotlin/reflect/KType; + public static final fun NullableColumnDescription_type (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/String; public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/ColumnsContainer;)Lorg/jetbrains/kotlinx/dataframe/DataColumn; public static final fun NullableColumnDescription_unique (Lorg/jetbrains/kotlinx/dataframe/DataRow;)Ljava/lang/Integer; } @@ -4146,6 +4146,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/DataColumnArithmeticsKt { } public final class org/jetbrains/kotlinx/dataframe/api/DataColumnTypeKt { + public static final fun isBigNumber (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isColumnGroup (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isFrameColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z @@ -4154,6 +4155,7 @@ public final class org/jetbrains/kotlinx/dataframe/api/DataColumnTypeKt { public static final fun isPrimitive (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z public static final fun isSubtypeOf (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lkotlin/reflect/KType;)Z public static final fun isValueColumn (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z + public static final fun valuesAreComparable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;)Z } public final class org/jetbrains/kotlinx/dataframe/api/DataFrameBuilder { @@ -10771,6 +10773,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/BasicStats { public final class org/jetbrains/kotlinx/dataframe/math/MeanKt { public static final fun bigDecimalMean (Ljava/lang/Iterable;)D + public static final fun bigIntegerMean (Ljava/lang/Iterable;)D public static final fun byteMean (Ljava/lang/Iterable;)D public static final fun doubleMean (Ljava/lang/Iterable;Z)D public static synthetic fun doubleMean$default (Ljava/lang/Iterable;ZILjava/lang/Object;)D @@ -10794,6 +10797,8 @@ public final class org/jetbrains/kotlinx/dataframe/math/MedianKt { public final class org/jetbrains/kotlinx/dataframe/math/StdKt { public static final fun bigDecimalStd (Ljava/lang/Iterable;I)D public static synthetic fun bigDecimalStd$default (Ljava/lang/Iterable;IILjava/lang/Object;)D + public static final fun bigIntegerStd (Ljava/lang/Iterable;I)D + public static synthetic fun bigIntegerStd$default (Ljava/lang/Iterable;IILjava/lang/Object;)D public static final fun doubleStd (Ljava/lang/Iterable;ZI)D public static synthetic fun doubleStd$default (Ljava/lang/Iterable;ZIILjava/lang/Object;)D public static final fun floatStd (Ljava/lang/Iterable;ZI)D @@ -10808,6 +10813,7 @@ public final class org/jetbrains/kotlinx/dataframe/math/StdKt { public final class org/jetbrains/kotlinx/dataframe/math/StdMeanKt { public static final fun bigDecimalVarianceAndMean (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; + public static final fun bigIntegerVarianceAndMean (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static final fun doubleVarianceAndMean (Ljava/lang/Iterable;Z)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static synthetic fun doubleVarianceAndMean$default (Ljava/lang/Iterable;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; public static final fun floatVarianceAndMean (Ljava/lang/Iterable;Z)Lorg/jetbrains/kotlinx/dataframe/math/BasicStats; @@ -10818,8 +10824,10 @@ public final class org/jetbrains/kotlinx/dataframe/math/StdMeanKt { public final class org/jetbrains/kotlinx/dataframe/math/SumKt { public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigDecimal; + public static final fun sum (Ljava/lang/Iterable;)Ljava/math/BigInteger; public static final fun sum (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number; public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigDecimal; + public static final fun sum (Lkotlin/sequences/Sequence;)Ljava/math/BigInteger; public static final fun sumNullableT (Ljava/lang/Iterable;Lkotlin/reflect/KType;)Ljava/lang/Number; public static final fun sumOf (Ljava/lang/Iterable;Lkotlin/reflect/KType;Lkotlin/jvm/functions/Function1;)Ljava/lang/Number; } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt index 2c7887ab3b..da75c4058f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/DataColumnType.kt @@ -11,6 +11,11 @@ import org.jetbrains.kotlinx.dataframe.impl.isNothing import org.jetbrains.kotlinx.dataframe.impl.projectTo import org.jetbrains.kotlinx.dataframe.type import org.jetbrains.kotlinx.dataframe.typeClass +import org.jetbrains.kotlinx.dataframe.util.IS_COMPARABLE +import org.jetbrains.kotlinx.dataframe.util.IS_COMPARABLE_REPLACE +import org.jetbrains.kotlinx.dataframe.util.IS_INTER_COMPARABLE_IMPORT +import java.math.BigDecimal +import java.math.BigInteger import kotlin.contracts.ExperimentalContracts import kotlin.contracts.contract import kotlin.reflect.KClass @@ -44,13 +49,29 @@ public inline fun AnyCol.isType(): Boolean = type() == typeOf() public fun AnyCol.isNumber(): Boolean = isSubtypeOf() +public fun AnyCol.isBigNumber(): Boolean = isSubtypeOf() || isSubtypeOf() + public fun AnyCol.isList(): Boolean = typeClass == List::class +/** @include [valuesAreComparable] */ +@Deprecated( + message = IS_COMPARABLE, + replaceWith = ReplaceWith(IS_COMPARABLE_REPLACE, IS_INTER_COMPARABLE_IMPORT), + level = DeprecationLevel.WARNING, +) +public fun AnyCol.isComparable(): Boolean = valuesAreComparable() + /** - * Returns `true` if [this] column is comparable, i.e. its type is a subtype of [Comparable] and its - * type argument is not [Nothing]. + * Returns `true` if [this] column is inter-comparable, i.e. + * its values can be compared with each other and thus ordered. + * + * If true, operations like [`min()`][AnyCol.min], [`max()`][AnyCol.max], [`median()`][AnyCol.median], etc. + * will work. + * + * Technically, this means the values' common type is a subtype of [Comparable] with + * the type argument not being [Nothing]. */ -public fun AnyCol.isComparable(): Boolean = +public fun AnyCol.valuesAreComparable(): Boolean = isSubtypeOf?>() && type().projectTo(Comparable::class).arguments[0].let { it != KTypeProjection.STAR && diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index 9d8c7ff715..4779d07437 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -5,10 +5,13 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.nullableNothingType import org.jetbrains.kotlinx.dataframe.math.cumSum import org.jetbrains.kotlinx.dataframe.math.defaultCumSumSkipNA import org.jetbrains.kotlinx.dataframe.typeClass import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KProperty import kotlin.reflect.typeOf @@ -17,20 +20,59 @@ import kotlin.reflect.typeOf public fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn = when (type()) { typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + + // TODO cumSum for Byte returns Int but is converted back to T: Byte, Issue #558 + typeOf() -> cast().cumSum().map { it.toByte() }.cast() + + // TODO cumSum for Short returns Int but is converted back to T: Short, Issue #558 + typeOf() -> cast().cumSum().map { it.toShort() }.cast() + typeOf() -> cast().cumSum(skipNA).cast() + + // TODO cumSum for Byte? returns Int? but is converted back to T: Byte?, Issue #558 + typeOf() -> cast().cumSum(skipNA).map { it?.toByte() }.cast() + + // TODO cumSum for Short? returns Int? but is converted back to T: Short?, Issue #558 + typeOf() -> cast().cumSum(skipNA).map { it?.toShort() }.cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + + typeOf() -> cast().cumSum().cast() + + typeOf() -> cast().cumSum(skipNA).cast() + typeOf() -> cast().cumSum().cast() + typeOf() -> cast().cumSum(skipNA).cast() + typeOf(), typeOf() -> convertToDouble().cumSum(skipNA).cast() + + // Cumsum for empty column or column with just null is itself + nothingType, nullableNothingType -> this + else -> error("Cumsum for type ${type()} is not supported") } -private val supportedClasses = setOf(Double::class, Float::class, Int::class, Long::class, BigDecimal::class) +private val supportedClasses = setOf( + Double::class, + Float::class, + Int::class, + Byte::class, + Short::class, + Long::class, + BigInteger::class, + BigDecimal::class, +) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index 66b51c5881..361b50f19f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -9,14 +9,13 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.api.describeImpl import kotlin.reflect.KProperty -import kotlin.reflect.KType // region DataSchema @DataSchema public interface ColumnDescription { public val name: String public val path: ColumnPath - public val type: KType + public val type: String public val count: Int public val unique: Int public val nulls: Int diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index 59b34e8562..96f836bc0d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated @@ -52,7 +52,7 @@ public inline fun > AnyRow.rowMaxOf(): T = rowMaxOfOrN // region DataFrame -public fun DataFrame.max(): DataRow = maxFor(comparableColumns()) +public fun DataFrame.max(): DataRow = maxFor(interComparableColumns()) public fun > DataFrame.maxFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.max.aggregateFor(this, columns) @@ -121,7 +121,7 @@ public fun > DataFrame.maxByOrNull(column: KProperty // region GroupBy -public fun Grouped.max(): DataFrame = maxFor(comparableColumns()) +public fun Grouped.max(): DataFrame = maxFor(interComparableColumns()) public fun > Grouped.maxFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.max.aggregateFor(this, columns) @@ -218,7 +218,7 @@ public fun > Pivot.maxBy(column: KProperty): Reduced // region PivotGroupBy -public fun PivotGroupBy.max(separate: Boolean = false): DataFrame = maxFor(separate, comparableColumns()) +public fun PivotGroupBy.max(separate: Boolean = false): DataFrame = maxFor(separate, interComparableColumns()) public fun > PivotGroupBy.maxFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt index fb57e33d77..ded0944f4a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/median.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.cast -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf @@ -53,7 +53,7 @@ public inline fun > AnyRow.rowMedianOf(): T = // region DataFrame -public fun DataFrame.median(): DataRow = medianFor(comparableColumns()) +public fun DataFrame.median(): DataRow = medianFor(interComparableColumns()) public fun > DataFrame.medianFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.median.aggregateFor(this, columns) @@ -96,7 +96,7 @@ public inline fun > DataFrame.medianOf( // region GroupBy -public fun Grouped.median(): DataFrame = medianFor(comparableColumns()) +public fun Grouped.median(): DataFrame = medianFor(interComparableColumns()) public fun > Grouped.medianFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.median.aggregateFor(this, columns) @@ -134,7 +134,7 @@ public inline fun > Grouped.medianOf( // region Pivot -public fun Pivot.median(separate: Boolean = false): DataRow = medianFor(separate, comparableColumns()) +public fun Pivot.median(separate: Boolean = false): DataRow = medianFor(separate, interComparableColumns()) public fun > Pivot.medianFor( separate: Boolean = false, @@ -174,7 +174,7 @@ public inline fun > Pivot.medianOf( // region PivotGroupBy public fun PivotGroupBy.median(separate: Boolean = false): DataFrame = - medianFor(separate, comparableColumns()) + medianFor(separate, interComparableColumns()) public fun > PivotGroupBy.medianFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt index 2ec4be8943..42b1405372 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/min.kt @@ -11,7 +11,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.Aggregators -import org.jetbrains.kotlinx.dataframe.impl.aggregation.comparableColumns +import org.jetbrains.kotlinx.dataframe.impl.aggregation.interComparableColumns import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateAll import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfDelegated @@ -52,7 +52,7 @@ public inline fun > AnyRow.rowMinOf(): T = rowMinOfOrN // region DataFrame -public fun DataFrame.min(): DataRow = minFor(comparableColumns()) +public fun DataFrame.min(): DataRow = minFor(interComparableColumns()) public fun > DataFrame.minFor(columns: ColumnsForAggregateSelector): DataRow = Aggregators.min.aggregateFor(this, columns) @@ -121,7 +121,7 @@ public fun > DataFrame.minByOrNull(column: KProperty // region GroupBy -public fun Grouped.min(): DataFrame = minFor(comparableColumns()) +public fun Grouped.min(): DataFrame = minFor(interComparableColumns()) public fun > Grouped.minFor(columns: ColumnsForAggregateSelector): DataFrame = Aggregators.min.aggregateFor(this, columns) @@ -219,7 +219,7 @@ public fun > Pivot.minBy(column: KProperty): Reduced // region PivotGroupBy -public fun PivotGroupBy.min(separate: Boolean = false): DataFrame = minFor(separate, comparableColumns()) +public fun PivotGroupBy.min(separate: Boolean = false): DataFrame = minFor(separate, interComparableColumns()) public fun > PivotGroupBy.minFor( separate: Boolean = false, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt index 9cd34f7ea9..163ff395ea 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/typeConversions.kt @@ -83,7 +83,7 @@ public fun DataColumn.asNumbers(): ValueColumn { } public fun DataColumn.asComparable(): DataColumn> { - require(isComparable()) + require(valuesAreComparable()) return this as DataColumn> } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt index 199568630a..22c4438e33 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/TypeUtils.kt @@ -11,6 +11,8 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType import org.jetbrains.kotlinx.dataframe.util.GUESS_VALUE_TYPE +import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KClass import kotlin.reflect.KType import kotlin.reflect.KTypeParameter @@ -29,6 +31,7 @@ import kotlin.reflect.full.superclasses import kotlin.reflect.full.withNullability import kotlin.reflect.jvm.jvmErasure import kotlin.reflect.typeOf +import kotlin.toBigDecimal as toBigDecimalKotlin internal inline fun KClass<*>.createTypeUsing() = typeOf().projectTo(this) @@ -546,12 +549,10 @@ internal fun guessValueType( internal val KType.isNothing: Boolean get() = classifier == Nothing::class -internal fun nothingType(nullable: Boolean): KType = - if (nullable) { - typeOf>() - } else { - typeOf>() - }.arguments.first().type!! +internal val nothingType: KType = typeOf>().arguments.first().type!! +internal val nullableNothingType: KType = typeOf>().arguments.first().type!! + +internal fun nothingType(nullable: Boolean): KType = if (nullable) nullableNothingType else nothingType @OptIn(ExperimentalUnsignedTypes::class) private val primitiveArrayClasses = setOf( @@ -646,3 +647,18 @@ internal fun Any.asArrayAsListOrNull(): List<*>? = is Array<*> -> asList() else -> null } + +internal fun Any.isBigNumber(): Boolean = this is BigInteger || this is BigDecimal + +internal fun Number.toBigDecimal(): BigDecimal = + when (this) { + is BigDecimal -> this + is BigInteger -> this.toBigDecimalKotlin() + is Int -> this.toBigDecimalKotlin() + is Byte -> this.toInt().toBigDecimalKotlin() + is Short -> this.toInt().toBigDecimalKotlin() + is Long -> this.toBigDecimalKotlin() + is Float -> this.toBigDecimalKotlin() + is Double -> this.toBigDecimalKotlin() + else -> BigDecimal(this.toString()) + } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt index 0f244fec4d..6f514d95eb 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/getColumns.kt @@ -5,8 +5,8 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.aggregation.Aggregatable import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isNumber +import org.jetbrains.kotlinx.dataframe.api.valuesAreComparable import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType @@ -14,8 +14,8 @@ internal inline fun Aggregatable.remainingColumns( crossinline predicate: (AnyCol) -> Boolean, ): ColumnsSelector = remainingColumnsSelector().filter { predicate(it.data) } -internal fun Aggregatable.comparableColumns() = - remainingColumns { it.isComparable() } as ColumnsSelector> +internal fun Aggregatable.interComparableColumns() = + remainingColumns { it.valuesAreComparable() } as ColumnsSelector> internal fun Aggregatable.numberColumns() = remainingColumns { it.isNumber() } as ColumnsSelector diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index dad18f4901..109b031587 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -1,17 +1,19 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.api.ColumnDescription import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.after +import org.jetbrains.kotlinx.dataframe.api.any import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.asComparable import org.jetbrains.kotlinx.dataframe.api.asNumbers import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isNumber +import org.jetbrains.kotlinx.dataframe.api.map import org.jetbrains.kotlinx.dataframe.api.maxOrNull import org.jetbrains.kotlinx.dataframe.api.mean import org.jetbrains.kotlinx.dataframe.api.medianOrNull @@ -20,47 +22,26 @@ import org.jetbrains.kotlinx.dataframe.api.move import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.api.std import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.valuesAreComparable import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.size import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.jetbrains.kotlinx.dataframe.impl.isBigNumber import org.jetbrains.kotlinx.dataframe.impl.renderType +import org.jetbrains.kotlinx.dataframe.impl.toBigDecimal import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.type internal fun describeImpl(cols: List): DataFrame { - fun List.collectAll(atAnyDepth: Boolean): List = - flatMap { col -> - when (col.kind) { - ColumnKind.Frame -> - col.asAnyFrameColumn() - .concat() - .columns() - .map { it.addPath(col.path() + it.name) } - .collectAll(true) + val allCols = cols.collectAll(false) - ColumnKind.Group -> - if (atAnyDepth) { - col.asColumnGroup() - .columns() - .map { it.addPath(col.path() + it.name) } - .collectAll(true) - } else { - listOf(col) - } - - ColumnKind.Value -> listOf(col) - } - } - - val all = cols.collectAll(false) - - val hasNumeric = all.any { it.isNumber() } - val hasComparable = all.any { it.isComparable() } - val hasLongPaths = all.any { it.path().size > 1 } - var df = all.toDataFrame { + val hasNumericCols = allCols.any { it.isNumber() } + val hasComparableCols = allCols.any { it.valuesAreComparable() } + val hasLongPaths = allCols.any { it.path().size > 1 } + var df = allCols.toDataFrame { ColumnDescription::name from { it.name() } if (hasLongPaths) { ColumnDescription::path from { it.path() } @@ -74,23 +55,67 @@ internal fun describeImpl(cols: List): DataFrame { .groupBy { it }.maxByOrNull { it.value.size } ?.key } - if (hasNumeric) { + if (hasNumericCols) { ColumnDescription::mean from { if (it.isNumber()) it.asNumbers().mean() else null } ColumnDescription::std from { if (it.isNumber()) it.asNumbers().std() else null } } - if (hasComparable) { - ColumnDescription::min from inferType { if (it.isComparable()) it.asComparable().minOrNull() else null } + if (hasComparableCols || hasNumericCols) { + ColumnDescription::min from inferType { + it.convertToComparableOrNull()?.minOrNull() + } ColumnDescription::median from inferType { - if (it.isComparable()) it.asComparable().medianOrNull() else null + it.convertToComparableOrNull()?.medianOrNull() + } + ColumnDescription::max from inferType { + it.convertToComparableOrNull()?.maxOrNull() } - ColumnDescription::max from inferType { if (it.isComparable()) it.asComparable().maxOrNull() else null } } } df = df.add(ColumnDescription::freq) { val top = it[ColumnDescription::top] - val data = all[index] + val data = allCols[index] data.values.count { it == top } }.move(ColumnDescription::freq).after(ColumnDescription::top) return df.cast() } + +private fun List.collectAll(atAnyDepth: Boolean): List = + flatMap { col -> + when (col.kind) { + ColumnKind.Frame -> + col.asAnyFrameColumn() + .concat() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + + ColumnKind.Group -> + if (atAnyDepth) { + col.asColumnGroup() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + } else { + listOf(col) + } + + ColumnKind.Value -> listOf(col) + } + } + +/** Converts a column to a comparable column if it is not already comparable. */ +private fun DataColumn.convertToComparableOrNull(): DataColumn>? = + when { + valuesAreComparable() -> asComparable() + + // Found incomparable number types, convert all to Double or BigDecimal first + isNumber() -> + if (any { it?.isBigNumber() == true }) { + map { (it as Number?)?.toBigDecimal() } + } else { + map { (it as Number?)?.toDouble() } + }.cast() + + else -> null + } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt index 6799f41ca0..730b3234aa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/KotlinNotebookPluginUtils.kt @@ -28,10 +28,10 @@ import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.frames import org.jetbrains.kotlinx.dataframe.api.getColumn import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.sortWith import org.jetbrains.kotlinx.dataframe.api.toDataFrame import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.api.valuesAreComparable import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator @@ -107,7 +107,7 @@ public object KotlinNotebookPluginUtils { private fun createComparator(sortKeys: List, isDesc: List): Comparator> { return Comparator { row1, row2 -> for ((key, desc) in sortKeys.zip(isDesc)) { - val comparisonResult = if (row1.df().getColumn(key).isComparable()) { + val comparisonResult = if (row1.df().getColumn(key).valuesAreComparable()) { compareComparableValues(row1, row2, key, desc) } else { compareStringValues(row1, row2, key, desc) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt index 58ef59329a..e1220c83a4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/cumsum.kt @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.isNA import org.jetbrains.kotlinx.dataframe.api.map import java.math.BigDecimal +import java.math.BigInteger internal val defaultCumSumSkipNA: Boolean = true @@ -88,6 +89,66 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Dat } } +@JvmName("byteCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0 + return map { + sum += it + sum + } +} + +@JvmName("cumsumByteNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0 + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + +@JvmName("shortCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = 0 + return map { + sum += it + sum + } +} + +@JvmName("cumsumShortNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = 0 + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + @JvmName("longCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = 0L @@ -118,6 +179,36 @@ internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Da } } +@JvmName("bigIntegerCumsum") +internal fun DataColumn.cumSum(): DataColumn { + var sum = BigInteger.ZERO + return map { + sum += it + sum + } +} + +@JvmName("cumsumBigIntegerNullable") +internal fun DataColumn.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn { + var sum = BigInteger.ZERO + var fillNull = false + return map { + when { + it == null -> { + if (!skipNA) fillNull = true + null + } + + fillNull -> null + + else -> { + sum += it + sum + } + } + } +} + @JvmName("bigDecimalCumsum") internal fun DataColumn.cumSum(): DataColumn { var sum = BigDecimal.ZERO diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt index c7c6e95962..40fe9bbce1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/mean.kt @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.api.skipNA_default import org.jetbrains.kotlinx.dataframe.impl.renderType import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -10,6 +11,7 @@ import kotlin.reflect.full.withNullability internal fun Iterable.mean(type: KType, skipNA: Boolean = skipNA_default): Double = asSequence().mean(type, skipNA) +@Suppress("UNCHECKED_CAST") internal fun Sequence.mean(type: KType, skipNA: Boolean = skipNA_default): Double { if (type.isMarkedNullable) { return filterNotNull().mean(type.withNullability(false), skipNA) @@ -28,6 +30,8 @@ internal fun Sequence.mean(type: KType, skipNA: Boolean = skipNA Long::class -> (this as Sequence).map { it.toDouble() }.mean(false) + BigInteger::class -> (this as Sequence).map { it.toDouble() }.mean(false) + BigDecimal::class -> (this as Sequence).map { it.toDouble() }.mean(skipNA) Number::class -> (this as Sequence).map { it.toDouble() }.mean(skipNA) @@ -132,6 +136,21 @@ public fun Iterable.mean(): Double = if (count > 0) sum / count else Double.NaN } +// TODO result is Double, but should be BigDecimal, Issue #558 +@JvmName("bigIntegerMean") +public fun Iterable.mean(): Double = + if (this is Collection) { + if (size > 0) sumOf { it.toDouble() } / size else Double.NaN + } else { + var count = 0 + val sum = sumOf { + count++ + it.toDouble() + } + if (count > 0) sum / count else Double.NaN + } + +// TODO result is Double, but should be BigDecimal, Issue #558 @JvmName("bigDecimalMean") public fun Iterable.mean(): Double = if (this is Collection) { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt index 559b4027d6..7fe4df949b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/median.kt @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.impl.asList import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.typeOf @@ -9,6 +10,8 @@ public inline fun > Iterable.medianOrNull(): T? = m public inline fun > Iterable.median(): T = medianOrNull()!! +// TODO median always returns the same type, but this can be confusing for iterables of even length +// TODO (e.g. median of [1, 2] should be 1.5, but the type is Int, so it returns 1), Issue #558 @PublishedApi internal inline fun > Iterable.median(type: KType): T? { val list = if (type.isMarkedNullable) filterNotNull() else (this as Iterable).asList() @@ -19,14 +22,22 @@ internal inline fun > Iterable.median(type: KType) return when (type.classifier) { Double::class -> ((list.quickSelect(index - 1) as Double + list.quickSelect(index) as Double) / 2.0) as T + Float::class -> ((list.quickSelect(index - 1) as Float + list.quickSelect(index) as Float) / 2.0f) as T + Int::class -> ((list.quickSelect(index - 1) as Int + list.quickSelect(index) as Int) / 2) as T + Short::class -> ((list.quickSelect(index - 1) as Short + list.quickSelect(index) as Short) / 2) as T + Long::class -> ((list.quickSelect(index - 1) as Long + list.quickSelect(index) as Long) / 2L) as T Byte::class -> ((list.quickSelect(index - 1) as Byte + list.quickSelect(index) as Byte) / 2).toByte() as T BigDecimal::class -> ( - (list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / BigDecimal(2) + (list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / 2.toBigDecimal() + ) as T + + BigInteger::class -> ( + (list.quickSelect(index - 1) as BigInteger + list.quickSelect(index) as BigInteger) / 2.toBigInteger() ) as T else -> list.quickSelect(index - 1) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt deleted file mode 100644 index fb0f0871a0..0000000000 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/minmax.kt +++ /dev/null @@ -1,25 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.math - -import java.math.BigDecimal -import kotlin.reflect.KType -import kotlin.reflect.jvm.jvmErasure - -internal fun Iterable.min(type: KType) = - when (type.jvmErasure) { - Double::class -> (this as Iterable).minOrNull() - Float::class -> (this as Iterable).minOrNull() - Int::class, Short::class, Byte::class -> (this as Iterable).minOrNull() - Long::class -> (this as Iterable).minOrNull() - BigDecimal::class -> (this as Iterable).minOrNull() - else -> throw IllegalArgumentException() - } - -internal fun Iterable.max(type: KType) = - when (type.jvmErasure) { - Double::class -> (this as Iterable).maxOrNull() - Float::class -> (this as Iterable).maxOrNull() - Int::class, Short::class, Byte::class -> (this as Iterable).maxOrNull() - Long::class -> (this as Iterable).maxOrNull() - BigDecimal::class -> (this as Iterable).maxOrNull() - else -> throw IllegalArgumentException() - } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt index 119f0e2257..ab38f8c9c0 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/std.kt @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.api.ddof_default import org.jetbrains.kotlinx.dataframe.api.skipNA_default import org.jetbrains.kotlinx.dataframe.impl.renderType import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -26,6 +27,8 @@ internal fun Iterable.std( Int::class, Short::class, Byte::class -> (this as Iterable).std(ddof) Long::class -> (this as Iterable).std(ddof) BigDecimal::class -> (this as Iterable).std(ddof) + BigInteger::class -> (this as Iterable).std(ddof) + Number::class -> (this as Iterable).map { it.toDouble() }.std(skipNA, ddof) Nothing::class -> Double.NaN else -> throw IllegalArgumentException("Unable to compute the std for type ${renderType(type)}") } @@ -47,3 +50,6 @@ public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMea @JvmName("bigDecimalStd") public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMean().std(ddof) + +@JvmName("bigIntegerStd") +public fun Iterable.std(ddof: Int = ddof_default): Double = varianceAndMean().std(ddof) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt index ee95c6d1ae..8deb8c1eff 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/stdMean.kt @@ -4,6 +4,7 @@ package org.jetbrains.kotlinx.dataframe.math import org.jetbrains.kotlinx.dataframe.api.skipNA_default import java.math.BigDecimal +import java.math.BigInteger import kotlin.math.sqrt public data class BasicStats(val count: Int, val mean: Double, val variance: Double) { @@ -114,3 +115,20 @@ public fun Iterable.varianceAndMean(): BasicStats { } return BasicStats(count, mean.toDouble(), variance.toDouble()) } + +@JvmName("bigIntegerVarianceAndMean") +public fun Iterable.varianceAndMean(): BasicStats { + var count = 0 + var sum = BigInteger.ZERO + for (element in this) { + sum += element + count++ + } + val mean = sum.toDouble() / count + var variance = .0 + for (element in this) { + val diff = element.toDouble() - mean + variance += diff * diff + } + return BasicStats(count, mean, variance) +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt index d26c155a34..08dae78937 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.math import java.math.BigDecimal +import java.math.BigInteger import kotlin.reflect.KType @PublishedApi @@ -11,10 +12,29 @@ internal fun Iterable.sumOf(type: KType, selector: (T) -> R?) } return when (type.classifier) { Double::class -> sumOf(selector as ((T) -> Double)) as R + + // careful, conversion to Double to Float occurs! TODO, Issue #558 + Float::class -> sumOf { (selector as ((T) -> Float))(it).toDouble() }.toFloat() as R + Int::class -> sumOf(selector as ((T) -> Int)) as R + + // careful, conversion to Int occurs! TODO, Issue #558 + Short::class -> sumOf { (selector as ((T) -> Short))(it).toInt() }.toShort() as R + + // careful, conversion to Int occurs! TODO, Issue #558 + Byte::class -> sumOf { (selector as ((T) -> Byte))(it).toInt() }.toByte() as R + Long::class -> sumOf(selector as ((T) -> Long)) as R + BigDecimal::class -> sumOf(selector as ((T) -> BigDecimal)) as R - else -> TODO() + + BigInteger::class -> sumOf(selector as ((T) -> BigInteger)) as R + + Number::class -> sumOf { (selector as ((T) -> Number))(it).toDouble() } as R + + Nothing::class -> 0.0 as R + + else -> throw IllegalArgumentException("sumOf is not supported for $type") } } @@ -22,11 +42,28 @@ internal fun Iterable.sumOf(type: KType, selector: (T) -> R?) internal fun Iterable.sum(type: KType): T = when (type.classifier) { Double::class -> (this as Iterable).sum() as T + Float::class -> (this as Iterable).sum() as T - Int::class, Short::class, Byte::class -> (this as Iterable).sum() as T + + Int::class -> (this as Iterable).sum() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Short::class -> (this as Iterable).sum().toShort() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Byte::class -> (this as Iterable).sum().toByte() as T + Long::class -> (this as Iterable).sum() as T + BigDecimal::class -> (this as Iterable).sum() as T - else -> throw IllegalArgumentException("Sum is not supported for $type") + + BigInteger::class -> (this as Iterable).sum() as T + + Number::class -> (this as Iterable).map { it.toDouble() }.sum() as T + + Nothing::class -> 0.0 as T + + else -> throw IllegalArgumentException("sum is not supported for $type") } @JvmName("sumNullableT") @@ -34,11 +71,28 @@ internal fun Iterable.sum(type: KType): T = internal fun Iterable.sum(type: KType): T = when (type.classifier) { Double::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + Float::class -> (this as Iterable).asSequence().filterNotNull().sum() as T - Int::class, Short::class, Byte::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + Int::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Short::class -> (this as Iterable).asSequence().filterNotNull().sum().toShort() as T + + // TODO result should be Int, but same type as input is returned, Issue #558 + Byte::class -> (this as Iterable).asSequence().filterNotNull().sum().toByte() as T + Long::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + BigDecimal::class -> (this as Iterable).asSequence().filterNotNull().sum() as T - else -> TODO() + + BigInteger::class -> (this as Iterable).asSequence().filterNotNull().sum() as T + + Number::class -> (this as Iterable).asSequence().filterNotNull().map { it.toDouble() }.sum() as T + + Nothing::class -> 0.0 as T + + else -> throw IllegalArgumentException("sum is not supported for $type") } @PublishedApi @@ -58,3 +112,21 @@ internal fun Sequence.sum(): BigDecimal { } return sum } + +@PublishedApi +internal fun Iterable.sum(): BigInteger { + var sum: BigInteger = BigInteger.ZERO + for (element in this) { + sum += element + } + return sum +} + +@PublishedApi +internal fun Sequence.sum(): BigInteger { + var sum: BigInteger = BigInteger.ZERO + for (element in this) { + sum += element + } + return sum +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt index b25e7fb604..237f7041b6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt @@ -9,7 +9,7 @@ package org.jetbrains.kotlinx.dataframe.util // region WARNING in 0.15, ERROR in 0.16 -private const val MESSAGE_0_16 = "Will be removed in 0.16." +private const val MESSAGE_0_16 = "Will be ERROR in 0.16." internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCSV`. $MESSAGE_0_16" internal const val DF_READ_NO_CSV_REPLACE = @@ -44,11 +44,16 @@ internal const val PARSER_OPTIONS = "This constructor is only here for binary co internal const val PARSER_OPTIONS_COPY = "This function is only here for binary compatibility. $MESSAGE_0_16" +internal const val IS_COMPARABLE = + "This function is replaced by `valuesAreComparable()` to better reflect its purpose. $MESSAGE_0_16" +internal const val IS_COMPARABLE_REPLACE = "valuesAreComparable()" +internal const val IS_INTER_COMPARABLE_IMPORT = "org.jetbrains.kotlinx.dataframe.api.valuesAreComparable" + // endregion // region WARNING in 0.16, ERROR in 0.17 -private const val MESSAGE_0_17 = "Will be removed in 0.17." +private const val MESSAGE_0_17 = "Will be ERROR in 0.17." // endregion diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index a418e87445..1b27387a34 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -1,6 +1,7 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.alsoDebug import org.junit.Test class DescribeTests { @@ -11,4 +12,60 @@ class DescribeTests { val df = dataFrameOf(a).drop(1) df.describe()["min"][0] shouldBe null } + + @Test + fun `describe nullable Number column`() { + val a by columnOf( + 1, + 2.0, + 3f, + 4L, + 5.toShort(), + 6.toByte(), + 7.toBigInteger(), + 8.toBigDecimal(), + null, + ) + val df = dataFrameOf(a) + val describe = df.describe() + .alsoDebug() + .single() + with(describe) { + name shouldBe "a" + type shouldBe "Number?" + count shouldBe 9 + unique shouldBe 9 + nulls shouldBe 1 + top shouldBe 1 + freq shouldBe 1 + mean shouldBe 4.5 + std shouldBe 2.449489742783178 + min shouldBe 1.toBigDecimal() + median shouldBe 4.toBigDecimal() + max shouldBe 8.toBigDecimal() + } + } + + @Test + fun `describe with NaNs`() { + val a by columnOf(1.0, 2.0, Double.NaN, 4.0) + val df = dataFrameOf(a) + val describe = df.describe() + .alsoDebug() + .single() + with(describe) { + name shouldBe "a" + type shouldBe "Double" + count shouldBe 4 + unique shouldBe 4 + nulls shouldBe 0 + top shouldBe 1 + freq shouldBe 1 + mean.isNaN() shouldBe true + std.isNaN() shouldBe true + min shouldBe 1.0 // TODO should be NaN too? + median shouldBe 3.0 + max.isNaN shouldBe true + } + } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt index 385023eda8..73282e6125 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.api.concat import org.jetbrains.kotlinx.dataframe.api.cumSum import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.map import org.junit.Test @Suppress("ktlint:standard:argument-list-wrapping") @@ -22,6 +23,44 @@ class CumsumTests { col.cumSum(skipNA = false).toList() shouldBe expectedNoSkip } + @Test + fun `short column`() { + col.map { it?.toShort() }.cumSum().toList() shouldBe expected + col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip + } + + @Test + fun `frame with multiple columns`() { + val col2 by columnOf(1.toShort(), 2, 3, 4, 5) + val col3 by columnOf(1.toByte(), 2, 3, 4, null) + val df = dataFrameOf(col, col2, col3) + val res = df.cumSum(skipNA = false) + + res[col].toList() shouldBe expectedNoSkip + res[col2].toList() shouldBe listOf(1.toShort(), 3, 6, 10, 15) + res[col3].toList() shouldBe listOf(1.toByte(), 3, 6, 10, null) + } + + @Test + fun `byte column`() { + col.map { it?.toByte() }.cumSum().toList() shouldBe expected.map { it?.toByte() } + col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toByte() } + } + + @Test + fun `big int column`() { + col.map { it?.toBigInteger() }.cumSum().toList() shouldBe expected.map { it?.toBigInteger() } + col.map { it?.toBigInteger() }.cumSum(skipNA = false) + .toList() shouldBe expectedNoSkip.map { it?.toBigInteger() } + } + + @Test + fun `big decimal column`() { + col.map { it?.toBigDecimal() }.cumSum().toList() shouldBe expected.map { it?.toBigDecimal() } + col.map { it?.toBigDecimal() }.cumSum(skipNA = false) + .toList() shouldBe expectedNoSkip.map { it?.toBigDecimal() } + } + @Test fun frame() { val str by columnOf("a", "b", "c", "d", "e") diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt index 4d403a173c..34f9548b9f 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt @@ -28,6 +28,19 @@ class StdTests { df.std().columnTypes().single() shouldBe typeOf() } + @Test + fun `std one byte column`() { + val value by columnOf(1.toByte(), 2.toByte(), 3.toByte()) + val df = dataFrameOf(value) + val expected = 1.0 + + value.values().std(typeOf()) shouldBe expected + value.std() shouldBe expected + df[value].std() shouldBe expected + df.std { value } shouldBe expected + df.std().columnTypes().single() shouldBe typeOf() + } + @Test fun `std one double column`() { val value by columnOf(1.0, 2.0, 3.0) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt new file mode 100644 index 0000000000..b93d8ab705 --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/sum.kt @@ -0,0 +1,64 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.junit.Test + +class SumTests { + + @Test + fun `test single column`() { + val value by columnOf(1, 2, 3) + val df = dataFrameOf(value) + val expected = 6 + + value.values().sum() shouldBe expected + value.sum() shouldBe expected + df[value].sum() shouldBe expected + df.sum { value } shouldBe expected + df.sum()[value] shouldBe expected + df.sumOf { value() } shouldBe expected + } + + @Test + fun `test single short column`() { + val value by columnOf(1.toShort(), 2.toShort(), 3.toShort()) + val df = dataFrameOf(value) + val expected = 6 + + value.values().sum() shouldBe expected + value.sum() shouldBe expected + df[value].sum() shouldBe expected + df.sum { value } shouldBe expected + df.sum()[value] shouldBe expected + df.sumOf { value() } shouldBe expected + } + + @Test + fun `test multiple columns`() { + val value1 by columnOf(1, 2, 3) + val value2 by columnOf(4.0, 5.0, 6.0) + val value3: DataColumn by columnOf(7.0, 8, null) + val df = dataFrameOf(value1, value2, value3) + val expected1 = 6 + val expected2 = 15.0 + val expected3 = 15.0 + + df.sum()[value1] shouldBe expected1 + df.sum()[value2] shouldBe expected2 + df.sum()[value3] shouldBe expected3 + df.sumOf { value1() } shouldBe expected1 + df.sumOf { value2() } shouldBe expected2 + df.sumOf { value3() } shouldBe expected3 + df.sum(value1) shouldBe expected1 + df.sum(value2) shouldBe expected2 + df.sum(value3) shouldBe expected3 + df.sum { value1 } shouldBe expected1 + df.sum { value2 } shouldBe expected2 + df.sum { value3 } shouldBe expected3 + } +} diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index 63ee6c5e03..b8e5d61c0c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -81,7 +81,6 @@ import org.jetbrains.kotlinx.dataframe.api.intoColumns import org.jetbrains.kotlinx.dataframe.api.intoList import org.jetbrains.kotlinx.dataframe.api.intoRows import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isComparable import org.jetbrains.kotlinx.dataframe.api.isEmpty import org.jetbrains.kotlinx.dataframe.api.isFrameColumn import org.jetbrains.kotlinx.dataframe.api.isNA @@ -156,6 +155,7 @@ import org.jetbrains.kotlinx.dataframe.api.ungroup import org.jetbrains.kotlinx.dataframe.api.update import org.jetbrains.kotlinx.dataframe.api.value import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.api.valuesAreComparable import org.jetbrains.kotlinx.dataframe.api.valuesNotNull import org.jetbrains.kotlinx.dataframe.api.where import org.jetbrains.kotlinx.dataframe.api.with @@ -2445,12 +2445,12 @@ class DataFrameTests : BaseTest() { ComparableTest(2, 2, "b", "b", "2", "2"), ).toDataFrame() - df.int.isComparable() shouldBe true - df.comparableInt.isComparable() shouldBe true - df.string.isComparable() shouldBe true - df.comparableString.isComparable() shouldBe true - df.comparableStar.isComparable() shouldBe false - df.comparableNothing.isComparable() shouldBe false + df.int.valuesAreComparable() shouldBe true + df.comparableInt.valuesAreComparable() shouldBe true + df.string.valuesAreComparable() shouldBe true + df.comparableString.valuesAreComparable() shouldBe true + df.comparableStar.valuesAreComparable() shouldBe false + df.comparableNothing.valuesAreComparable() shouldBe false } @Test