Skip to content

Commit

Permalink
added tests for issue 761, fixed allColumnsExceptKeepingStructure
Browse files Browse the repository at this point in the history
  • Loading branch information
Jolanrensen committed Jan 29, 2025
1 parent 651cbae commit 073e1bb
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ public interface AllExceptColumnsSelectionDsl {
internal fun <C> ColumnSet<C>.exceptInternal(other: ColumnsResolver<*>): ColumnSet<C> =
createColumnSet { context ->
val resolvedCols = this.resolve(context)
val resolvedColsToExcept = other.resolve(context)
val resolvedColsToExcept = other.resolve(context).toSet()
resolvedCols.allColumnsExceptKeepingStructure(resolvedColsToExcept)
} as ColumnSet<C>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,59 +416,66 @@ internal fun List<ColumnWithPath<*>>.allColumnsExceptAndUnpack(
* Empty groups will be removed if [removeEmptyGroups]` == true`
*/
internal fun List<ColumnWithPath<*>>.allColumnsExceptKeepingStructure(
columns: Iterable<ColumnWithPath<*>>,
columns: Set<ColumnWithPath<*>>,
removeEmptyGroups: Boolean = true,
): List<ColumnWithPath<*>> {
if (isEmpty()) return emptyList()
val fullTree = collectTree()
for (columnToExcept in columns) {
// grab the node representing the column from the tree
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
if (nodeToExcept != null) {
// remove the children from the node (if it's a column group) and remove its data (the column itself)
nodeToExcept.allChildren().forEach { it.data = null }
nodeToExcept.data = null

// we need to update the data of the parent node(s) to reflect the removal of the column
if (nodeToExcept.parent != null) {
// we grab the data of the parent node, which should be a column group
// treat it as a DF to remove the column to except from it and
// convert it back to a column group
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
val adjustedCurrent = current
.remove(nodeToExcept.name)
.asColumnGroup(current.name)
.addPath(current.path())

// remove the group if it's empty and removeEmptyGroups is true
// else, simply update the parent's data with the adjusted column group
nodeToExcept.parent.data =
if (adjustedCurrent.cols().isEmpty() && removeEmptyGroups) {
null
} else {
adjustedCurrent
return flatMap {
val fullTree = listOf(it).collectTree()
for (columnToExcept in columns.sortedByDescending { it.path.size }) {
// grab the node representing the column from the tree
val nodeToExcept = fullTree.getOrPut(columnToExcept.path).asNullable()
if (nodeToExcept != null) {
// remove the children from the node (if it's a column group) and remove its data (the column itself)
nodeToExcept.allChildren().forEach { it.data = null }
nodeToExcept.data = null

// we need to update the data of the parent node(s) to reflect the removal of the column
if (nodeToExcept.parent != null) {
// we grab the data of the parent node, which should be a column group
// treat it as a DF to remove the column to except from it and
// convert it back to a column group
val current = nodeToExcept.parent.data as ColumnGroup<*>? ?: continue
val adjustedCurrent = current
.remove(nodeToExcept.name)
.asColumnGroup(current.name)
.addPath(current.path())

// remove the group if it's empty and removeEmptyGroups is true
// else, simply update the parent's data with the adjusted column group
nodeToExcept.parent.data =
if (adjustedCurrent.cols().isEmpty() && removeEmptyGroups) {
null
} else {
adjustedCurrent
}

// now we update the parent's parents recursively with new column group instances
var parent = nodeToExcept.parent.parent

@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
var currentNode = nodeToExcept.parent!!
while (parent != null) {
val parentData = parent.data as ColumnGroup<*>? ?: break
val currentData = currentNode.data
val modifiedParentData =
if (currentData == null) {
parentData.remove(currentNode.name)
} else {
parentData.replace(currentNode.name).with { currentData }
}
parent.data = modifiedParentData
.asColumnGroup(parentData.name)
.addPath(parentData.path())
currentNode = parent
parent = parent.parent
}

// now we update the parent's parents recursively with new column group instances
var parent = nodeToExcept.parent.parent

@Suppress("UNNECESSARY_NOT_NULL_ASSERTION")
var currentNode = nodeToExcept.parent!!
while (parent != null) {
val parentData = parent.data as ColumnGroup<*>? ?: break
parent.data = parentData
.replace(currentNode.name).with { currentNode.data!! }
.asColumnGroup(parentData.name)
.addPath(parentData.path())

currentNode = parent
parent = parent.parent
}
}
}
val subtrees = fullTree.topmostChildren { it.data != null }
subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
}
val subtrees = fullTree.topmostChildren { it.data != null }
return subtrees.map { it.data!!.addPath(it.pathFromRoot()) }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ import org.junit.Test

class AllExceptTests : ColumnsSelectionDslTests() {

@Test
fun `issue 761`() {
val renamed = df.rename { colsAtAnyDepth() except name.firstName }.into { it.name.uppercase() }
renamed.columnNames() shouldBe listOf("NAME", "AGE", "CITY", "WEIGHT", "ISHAPPY")
renamed.getColumnGroup("NAME").columnNames() shouldBe listOf("firstName", "LASTNAME")

val df2 = dataFrameOf("a.b", "a.c.d", "d.e", "d.f")(1, 3.0, 2, "b")
.move { all() }.into { it.name.split(".").toPath() }
df2.select { cols("a") except "a"["b"] }.let {
it.getColumnGroup("a").getColumnOrNull("b") shouldBe null
it[pathOf("a", "c", "d")].single() shouldBe 3.0
}
df2.select { cols("a") except "a"["c"]["d"] }.let {
it.getColumnGroup("a").getColumnOrNull("c") shouldBe null
it[pathOf("a", "b")].single() shouldBe 1
}
}

@Test
fun `exceptions`() {
shouldThrow<IllegalStateException> {
Expand Down Expand Up @@ -70,12 +88,15 @@ class AllExceptTests : ColumnsSelectionDslTests() {
).shouldAllBeEqual()

listOf(
df.select { name and name.firstName }.alsoDebug(),
df.select { cols(name) except name.firstName },
df.select { (name and name.firstName and name.firstName) except name.firstName },
df.select { (name and name and name.firstName).except(name.firstName).simplify() },
).shouldAllBeEqual()

df.select { (name and name.firstName and name.firstName) except name.firstName }.alsoDebug()

df.select { (name and name and name.firstName) except name.firstName }.alsoDebug()
df.getColumns { (name and name and name.firstName).except(name.firstName) }.forEach {
it.isColumnGroup() shouldBe true
it.asColumnGroup().columnNames() shouldBe listOf("lastName")
}
}

@Test
Expand Down

0 comments on commit 073e1bb

Please sign in to comment.