Skip to content

Commit

Permalink
adding tests and comments, missing types for median, expanding cumSum
Browse files Browse the repository at this point in the history
  • Loading branch information
Jolanrensen committed Nov 13, 2024
1 parent b2bdb4a commit 05e0993
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 38 deletions.
18 changes: 14 additions & 4 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,21 @@ public fun <T : Number?> DataColumn<T>.cumSum(skipNA: Boolean = defaultCumSumSki

typeOf<Float?>() -> cast<Float?>().cumSum(skipNA).cast()

// careful, cast to Int can occur! TODO
typeOf<Int>(), typeOf<Byte>(), typeOf<Short>() -> cast<Int>().cumSum().cast()
typeOf<Int>() -> cast<Int>().cumSum().cast()

// careful, cast to Int can occur! TODO
typeOf<Int?>(), typeOf<Byte?>(), typeOf<Short?>() -> cast<Int?>().cumSum(skipNA).cast()
// TODO cumSum for Byte returns Int but is cast back to T: Byte
typeOf<Byte>() -> cast<Byte>().cumSum().cast()

// TODO cumSum for Short returns Int but is cast back to T: Short
typeOf<Short>() -> cast<Short>().cumSum().cast()

typeOf<Int?>() -> cast<Int?>().cumSum(skipNA).cast()

// TODO cumSum for Byte? returns Int? but is cast back to T: Byte?
typeOf<Byte?>() -> cast<Byte?>().cumSum(skipNA).cast()

// TODO cumSum for Short? returns Int? but is cast back to T: Short?
typeOf<Short?>() -> cast<Short?>().cumSum(skipNA).cast()

typeOf<Long>() -> cast<Long>().cumSum().cast()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,66 @@ internal fun DataColumn<Int?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): Dat
}
}

@JvmName("byteCumsum")
internal fun DataColumn<Byte>.cumSum(): DataColumn<Int> {
var sum = 0
return map {
sum += it
sum
}
}

@JvmName("cumsumByteNullable")
internal fun DataColumn<Byte?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int?> {
var sum = 0
var fillNull = false
return map {
when {
it == null -> {
if (!skipNA) fillNull = true
null
}

fillNull -> null

else -> {
sum += it
sum
}
}
}
}

@JvmName("shortCumsum")
internal fun DataColumn<Short>.cumSum(): DataColumn<Int> {
var sum = 0
return map {
sum += it
sum
}
}

@JvmName("cumsumShortNullable")
internal fun DataColumn<Short?>.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataColumn<Int?> {
var sum = 0
var fillNull = false
return map {
when {
it == null -> {
if (!skipNA) fillNull = true
null
}

fillNull -> null

else -> {
sum += it
sum
}
}
}
}

@JvmName("longCumsum")
internal fun DataColumn<Long>.cumSum(): DataColumn<Long> {
var sum = 0L
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ public fun Iterable<Long>.mean(): Double =
if (count > 0) sum / count else Double.NaN
}

// TODO result is Double, but should be BigDecimal, Issue #558
@JvmName("bigIntegerMean")
public fun Iterable<BigInteger>.mean(): Double =
if (this is Collection) {
Expand All @@ -148,6 +149,7 @@ public fun Iterable<BigInteger>.mean(): Double =
if (count > 0) sum / count else Double.NaN
}

// TODO result is Double, but should be BigDecimal, Issue #558
@JvmName("bigDecimalMean")
public fun Iterable<BigDecimal>.mean(): Double =
if (this is Collection) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@ package org.jetbrains.kotlinx.dataframe.math

import org.jetbrains.kotlinx.dataframe.impl.asList
import java.math.BigDecimal
import java.math.BigInteger
import kotlin.reflect.KType
import kotlin.reflect.typeOf

public inline fun <reified T : Comparable<T>> Iterable<T>.medianOrNull(): T? = median(typeOf<T>())

public inline fun <reified T : Comparable<T>> Iterable<T>.median(): T = medianOrNull()!!

// TODO median always returns the same type, but this can be confusing for iterables of even length
// TODO (e.g. median of [1, 2] should be 1.5, but the type is Int, so it returns 1), Issue #558
@PublishedApi
internal inline fun <reified T : Comparable<T>> Iterable<T?>.median(type: KType): T? {
val list = if (type.isMarkedNullable) filterNotNull() else (this as Iterable<T>).asList()
Expand All @@ -19,14 +22,22 @@ internal inline fun <reified T : Comparable<T>> Iterable<T?>.median(type: KType)
return when (type.classifier) {
Double::class -> ((list.quickSelect(index - 1) as Double + list.quickSelect(index) as Double) / 2.0) as T

Float::class -> ((list.quickSelect(index - 1) as Float + list.quickSelect(index) as Float) / 2.0f) as T

Int::class -> ((list.quickSelect(index - 1) as Int + list.quickSelect(index) as Int) / 2) as T

Short::class -> ((list.quickSelect(index - 1) as Short + list.quickSelect(index) as Short) / 2) as T

Long::class -> ((list.quickSelect(index - 1) as Long + list.quickSelect(index) as Long) / 2L) as T

Byte::class -> ((list.quickSelect(index - 1) as Byte + list.quickSelect(index) as Byte) / 2).toByte() as T

BigDecimal::class -> (
(list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / BigDecimal(2)
(list.quickSelect(index - 1) as BigDecimal + list.quickSelect(index) as BigDecimal) / 2.toBigDecimal()
) as T

BigInteger::class -> (
(list.quickSelect(index - 1) as BigInteger + list.quickSelect(index) as BigInteger) / 2.toBigInteger()
) as T

else -> list.quickSelect(index - 1)
Expand Down

This file was deleted.

18 changes: 14 additions & 4 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/math/sum.kt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,13 @@ internal fun <T : Number> Iterable<T>.sum(type: KType): T =

Float::class -> (this as Iterable<Float>).sum() as T

// careful, cast to Int occurs! TODO
Int::class, Short::class, Byte::class -> (this as Iterable<Int>).sum() as T
Int::class -> (this as Iterable<Int>).sum() as T

// TODO result should be Int, but same type as input is returned, Issue #558
Short::class -> (this as Iterable<Short>).sum().toShort() as T

// TODO result should be Int, but same type as input is returned, Issue #558
Byte::class -> (this as Iterable<Byte>).sum().toByte() as T

Long::class -> (this as Iterable<Long>).sum() as T

Expand All @@ -69,8 +74,13 @@ internal fun <T : Number> Iterable<T?>.sum(type: KType): T =

Float::class -> (this as Iterable<Float?>).asSequence().filterNotNull().sum() as T

// careful, cast to Int occurs! TODO
Int::class, Short::class, Byte::class -> (this as Iterable<Int?>).asSequence().filterNotNull().sum() as T
Int::class -> (this as Iterable<Int?>).asSequence().filterNotNull().sum() as T

// TODO result should be Int, but same type as input is returned, Issue #558
Short::class -> (this as Iterable<Short?>).asSequence().filterNotNull().sum().toShort() as T

// TODO result should be Int, but same type as input is returned, Issue #558
Byte::class -> (this as Iterable<Short?>).asSequence().filterNotNull().sum().toByte() as T

Long::class -> (this as Iterable<Long?>).asSequence().filterNotNull().sum() as T

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,26 @@ class CumsumTests {

@Test
fun `short column`() {
col.map { it?.toShort() }.cumSum().toList() shouldBe expected.map { it?.toShort() }
col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toShort() }
col.map { it?.toShort() }.cumSum().toList() shouldBe expected
col.map { it?.toShort() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip
}

@Test
fun `frame with multiple columns`() {
val col2 by columnOf(1.toShort(), 2, 3, 4, 5)
val col3 by columnOf(1.toByte(), 2, 3, 4, null)
val df = dataFrameOf(col, col2, col3)
val res = df.cumSum(skipNA = false)

res[col].toList() shouldBe expectedNoSkip
res[col2].toList() shouldBe listOf(1, 3, 6, 10, 15)
res[col3].toList() shouldBe listOf(1, 3, 6, 10, null)
}

@Test
fun `byte column`() {
col.map { it?.toByte() }.cumSum().toList() shouldBe expected.map { it?.toByte() }
col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip.map { it?.toByte() }
col.map { it?.toByte() }.cumSum().toList() shouldBe expected
col.map { it?.toByte() }.cumSum(skipNA = false).toList() shouldBe expectedNoSkip
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ class StdTests {
df.std().columnTypes().single() shouldBe typeOf<Double>()
}

@Test
fun `std one byte column`() {
val value by columnOf(1.toByte(), 2.toByte(), 3.toByte())
val df = dataFrameOf(value)
val expected = 1.0

value.values().std(typeOf<Byte>()) shouldBe expected
value.std() shouldBe expected
df[value].std() shouldBe expected
df.std { value } shouldBe expected
df.std().columnTypes().single() shouldBe typeOf<Double>()
}

@Test
fun `std one double column`() {
val value by columnOf(1.0, 2.0, 3.0)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package org.jetbrains.kotlinx.dataframe.statistics

import io.kotest.matchers.shouldBe
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.api.columnOf
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.sum
import org.jetbrains.kotlinx.dataframe.api.sumOf
import org.junit.Test

class SumTests {

@Test
fun `test single column`() {
val value by columnOf(1, 2, 3)
val df = dataFrameOf(value)
val expected = 6

value.values().sum() shouldBe expected
value.sum() shouldBe expected
df[value].sum() shouldBe expected
df.sum { value } shouldBe expected
df.sum()[value] shouldBe expected
df.sumOf { value() } shouldBe expected
}

@Test
fun `test single short column`() {
val value by columnOf(1.toShort(), 2.toShort(), 3.toShort())
val df = dataFrameOf(value)
val expected = 6

value.values().sum() shouldBe expected
value.sum() shouldBe expected
df[value].sum() shouldBe expected
df.sum { value } shouldBe expected
df.sum()[value] shouldBe expected
df.sumOf { value() } shouldBe expected
}

@Test
fun `test multiple columns`() {
val value1 by columnOf(1, 2, 3)
val value2 by columnOf(4.0, 5.0, 6.0)
val value3: DataColumn<Number?> by columnOf(7.0, 8, null)
val df = dataFrameOf(value1, value2, value3)
val expected1 = 6
val expected2 = 15.0
val expected3 = 15.0

df.sum()[value1] shouldBe expected1
df.sum()[value2] shouldBe expected2
df.sum()[value3] shouldBe expected3
df.sumOf { value1() } shouldBe expected1
df.sumOf { value2() } shouldBe expected2
df.sumOf { value3() } shouldBe expected3
df.sum(value1) shouldBe expected1
df.sum(value2) shouldBe expected2
df.sum(value3) shouldBe expected3
df.sum { value1 } shouldBe expected1
df.sum { value2 } shouldBe expected2
df.sum { value3 } shouldBe expected3
}


}

0 comments on commit 05e0993

Please sign in to comment.