From 28088ea92b13d390b8cb3f0dd9ebb0115219b3c1 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 31 Jan 2025 13:16:59 +0200 Subject: [PATCH 1/3] [Compiler plugin] Support for dropNA --- .../jetbrains/kotlinx/dataframe/api/Nulls.kt | 2 ++ .../plugin/impl/api/{dropNulls.kt => Nulls.kt} | 13 ++++++++++++- .../kotlinx/dataframe/plugin/loadInterpreter.kt | 2 ++ plugins/kotlin-dataframe/testData/box/dropNA.kt | 17 +++++++++++++++++ .../DataFrameBlackBoxCodegenTestGenerated.java | 6 ++++++ 5 files changed, 39 insertions(+), 1 deletion(-) rename plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/{dropNulls.kt => Nulls.kt} (78%) create mode 100644 plugins/kotlin-dataframe/testData/box/dropNA.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index b1deac533d..76aaefe695 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -435,6 +435,8 @@ private interface CommonDropNAFunctionDoc * @include [DropNA.WhereAllNAParam] * @include [DropDslParam] */ +@Refine +@Interpretable("DropNa0") public fun DataFrame.dropNA(whereAllNA: Boolean = false, columns: ColumnsSelector): DataFrame { val cols = this[columns] return if (whereAllNA) { diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/dropNulls.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/Nulls.kt similarity index 78% rename from plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/dropNulls.kt rename to plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/Nulls.kt index 0977904f74..dc481f1090 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/dropNulls.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/Nulls.kt @@ -9,10 +9,10 @@ import org.jetbrains.kotlinx.dataframe.plugin.extensions.Marker import org.jetbrains.kotlinx.dataframe.plugin.impl.AbstractSchemaModificationInterpreter import org.jetbrains.kotlinx.dataframe.plugin.impl.Arguments import org.jetbrains.kotlinx.dataframe.plugin.impl.PluginDataFrameSchema +import org.jetbrains.kotlinx.dataframe.plugin.impl.Present import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleCol import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleColumnGroup import org.jetbrains.kotlinx.dataframe.plugin.impl.SimpleDataColumn -import org.jetbrains.kotlinx.dataframe.plugin.impl.data.ColumnWithPathApproximation import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame class DropNulls0 : AbstractSchemaModificationInterpreter() { @@ -24,6 +24,17 @@ class DropNulls0 : AbstractSchemaModificationInterpreter() { } } +class DropNa0 : AbstractSchemaModificationInterpreter() { + val Arguments.receiver: PluginDataFrameSchema by dataFrame() + val Arguments.whereAllNA: Boolean by arg(defaultValue = Present(false)) + val Arguments.columns: ColumnsResolver by arg() + + override fun Arguments.interpret(): PluginDataFrameSchema { + if (whereAllNA) return receiver + return PluginDataFrameSchema(fillNullsImpl(receiver.columns(), columns.resolve(receiver).mapTo(mutableSetOf()) { it.path.path }, emptyList())) + } +} + fun KotlinTypeFacade.fillNullsImpl( columns: List, paths: Set>, diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt index 33f642992d..4d3db8733f 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/loadInterpreter.kt @@ -83,6 +83,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameBuilderInvoke0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataFrameOf3 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DataRowReadJsonStr +import org.jetbrains.kotlinx.dataframe.plugin.impl.api.DropNa0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.FillNulls0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.Flatten0 import org.jetbrains.kotlinx.dataframe.plugin.impl.api.FlattenDefault @@ -223,6 +224,7 @@ internal inline fun String.load(): T { "Into0" -> Into0() "Ungroup0" -> Ungroup0() "DropNulls0" -> DropNulls0() + "DropNa0" -> DropNa0() "Properties0" -> Properties0() "Preserve0" -> Preserve0() "Preserve1" -> Preserve1() diff --git a/plugins/kotlin-dataframe/testData/box/dropNA.kt b/plugins/kotlin-dataframe/testData/box/dropNA.kt new file mode 100644 index 0000000000..f8864ff9ca --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/dropNA.kt @@ -0,0 +1,17 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +fun box(): String { + val df = dataFrameOf( + "a" to listOf(1, null, 3), + "b" to listOf(null, 5, 6) + ) + val df1 = df.dropNA { a and b } + df1.compareSchemas(strict = true) + + val df2 = df.dropNA(whereAllNA = true) { a and b } + df2.compareSchemas(strict = true) + return "OK" +} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index d736fc4081..db310032c5 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -130,6 +130,12 @@ public void testDiff() { runTest("testData/box/diff.kt"); } + @Test + @TestMetadata("dropNA.kt") + public void testDropNA() { + runTest("testData/box/dropNA.kt"); + } + @Test @TestMetadata("dropNulls.kt") public void testDropNulls() { From e9f6ea0b06b4ea4a75d0ac221b2324883c0b49cc Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 31 Jan 2025 14:54:35 +0200 Subject: [PATCH 2/3] [Compiler plugin] Add ability to extract schema from implicit this receiver --- .../dataframe/plugin/impl/api/select.kt | 5 ++++ .../kotlinx/dataframe/plugin/interpret.kt | 2 +- .../box/schemaFromImplicitReceiver.kt | 19 +++++++++++++++ .../testData/box/selectColsOf.kt | 23 +++++++++++++++++++ ...DataFrameBlackBoxCodegenTestGenerated.java | 12 ++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 plugins/kotlin-dataframe/testData/box/schemaFromImplicitReceiver.kt create mode 100644 plugins/kotlin-dataframe/testData/box/selectColsOf.kt diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/select.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/select.kt index c21f79e122..ede178eaa5 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/select.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/select.kt @@ -13,6 +13,7 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.data.ColumnPathApproximation import org.jetbrains.kotlinx.dataframe.plugin.impl.data.ColumnWithPathApproximation import org.jetbrains.kotlinx.dataframe.plugin.impl.dataFrame import org.jetbrains.kotlinx.dataframe.plugin.impl.enum +import org.jetbrains.kotlinx.dataframe.plugin.impl.ignore import org.jetbrains.kotlinx.dataframe.plugin.impl.type internal class Select0 : AbstractInterpreter() { @@ -25,6 +26,7 @@ internal class Select0 : AbstractInterpreter() { } internal class Expr0 : AbstractInterpreter() { + val Arguments.receiver by ignore() val Arguments.name: String by arg(defaultValue = Present("untitled")) val Arguments.infer: Infer by enum(defaultValue = Present(Infer.Nulls)) val Arguments.expression: TypeApproximation by type() @@ -53,6 +55,7 @@ internal class And0 : AbstractInterpreter() { } internal class All0 : AbstractInterpreter() { + val Arguments.receiver by ignore() override fun Arguments.interpret(): ColumnsResolver { return object : ColumnsResolver { override fun resolve(df: PluginDataFrameSchema): List { @@ -66,6 +69,7 @@ internal class All0 : AbstractInterpreter() { } internal class ColsOf0 : AbstractInterpreter() { + val Arguments.receiver by ignore() val Arguments.typeArg0: TypeApproximation by arg() override fun Arguments.interpret(): ColumnsResolver { @@ -90,6 +94,7 @@ private fun Arguments.colsOf(cols: List, type: Cone } internal class ColsAtAnyDepth0 : AbstractInterpreter() { + val Arguments.receiver by ignore() override fun Arguments.interpret(): ColumnsResolver { return object : ColumnsResolver { override fun resolve(df: PluginDataFrameSchema): List { diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/interpret.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/interpret.kt index fe135e62a5..2c8fe0098f 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/interpret.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/interpret.kt @@ -505,7 +505,7 @@ internal fun FirFunctionCall.collectArgumentExpressions(): RefinedArguments { val refinedArgument = mutableListOf() val parameterName = Name.identifier("receiver") - explicitReceiver?.let { + (explicitReceiver ?: extensionReceiver)?.let { if (it is FirResolvedQualifier && it.resolvedToCompanionObject) { return@let } diff --git a/plugins/kotlin-dataframe/testData/box/schemaFromImplicitReceiver.kt b/plugins/kotlin-dataframe/testData/box/schemaFromImplicitReceiver.kt new file mode 100644 index 0000000000..1337551428 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/schemaFromImplicitReceiver.kt @@ -0,0 +1,19 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +data class Nested(val d: Double) + +data class Record(val a: String, val b: Int, val nested: Nested) + +fun box(): String { + val df = dataFrameOf("a", "b", "c")(1, 2, 3) + + df.groupBy { a } + .updateGroups { remove { a } } + .aggregate { c into "c" } + return "OK" +} + + diff --git a/plugins/kotlin-dataframe/testData/box/selectColsOf.kt b/plugins/kotlin-dataframe/testData/box/selectColsOf.kt new file mode 100644 index 0000000000..b52eda56d2 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/selectColsOf.kt @@ -0,0 +1,23 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.io.* + +@DataSchema +interface JoinLeaf { + val something: Int + val somethingElse: String +} + +@DataSchema +interface Join2 { + val c: DataRow +} + +fun selectionDsl(df: DataFrame) { + df.ungroup { c }.select { colsOf() }.somethingElse +} + +fun box(): String { + return "OK" +} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index db310032c5..0a27b916a7 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -454,12 +454,24 @@ public void testSchema() { runTest("testData/box/Schema.kt"); } + @Test + @TestMetadata("schemaFromImplicitReceiver.kt") + public void testSchemaFromImplicitReceiver() { + runTest("testData/box/schemaFromImplicitReceiver.kt"); + } + @Test @TestMetadata("select.kt") public void testSelect() { runTest("testData/box/select.kt"); } + @Test + @TestMetadata("selectColsOf.kt") + public void testSelectColsOf() { + runTest("testData/box/selectColsOf.kt"); + } + @Test @TestMetadata("selectIt.kt") public void testSelectIt() { From 64f495afdb6ab1bdf12068369b7867d1d0c2c7a5 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 31 Jan 2025 14:57:11 +0200 Subject: [PATCH 3/3] [Compiler plugin] Support DF operations in AggregateGroupedBody scope --- .../aggregation/AggregateGroupedDsl.kt | 3 +++ .../testData/box/modifySchemaInAggregate.kt | 26 +++++++++++++++++++ ...DataFrameBlackBoxCodegenTestGenerated.java | 6 +++++ 3 files changed, 35 insertions(+) create mode 100644 plugins/kotlin-dataframe/testData/box/modifySchemaInAggregate.kt diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt index 43b529522c..6558c459e8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aggregation/AggregateGroupedDsl.kt @@ -1,3 +1,6 @@ package org.jetbrains.kotlinx.dataframe.aggregation +import org.jetbrains.kotlinx.dataframe.annotations.HasSchema + +@HasSchema(schemaArg = 0) public abstract class AggregateGroupedDsl : AggregateDsl() diff --git a/plugins/kotlin-dataframe/testData/box/modifySchemaInAggregate.kt b/plugins/kotlin-dataframe/testData/box/modifySchemaInAggregate.kt new file mode 100644 index 0000000000..a9f427b8b9 --- /dev/null +++ b/plugins/kotlin-dataframe/testData/box/modifySchemaInAggregate.kt @@ -0,0 +1,26 @@ +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.io.* + +data class Name(val firstName: String, val lastName: String) + +data class Score(val subject: String, val value: Int) + +data class Student(val name: Name, val age: Int, val scores: List) + +fun box(): String { + val students = listOf( + Student(Name("Alice", "Cooper"), 15, listOf(Score("math", 4), Score("biology", 3))), + Student(Name("Bob", "Marley"), 20, listOf(Score("music", 5))), + ) + + val df = students.toDataFrame().groupBy { expr { name.firstName} } + .aggregate { + remove { age } into "a" + } + + df.compareSchemas(strict = true) + return "OK" +} diff --git a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java index 0a27b916a7..26644a4601 100644 --- a/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java +++ b/plugins/kotlin-dataframe/tests-gen/org/jetbrains/kotlin/fir/dataframe/DataFrameBlackBoxCodegenTestGenerated.java @@ -292,6 +292,12 @@ public void testMapToFrame() { runTest("testData/box/mapToFrame.kt"); } + @Test + @TestMetadata("modifySchemaInAggregate.kt") + public void testModifySchemaInAggregate() { + runTest("testData/box/modifySchemaInAggregate.kt"); + } + @Test @TestMetadata("moveAfter.kt") public void testMoveAfter() {