Skip to content

Commit

Permalink
Merge pull request #1040 from Kotlin/stabilize-fast-double-parser
Browse files Browse the repository at this point in the history
Stabilize FastDoubleParser part 1
  • Loading branch information
Jolanrensen authored Feb 12, 2025
2 parents 90d8153 + 08570f9 commit 6f28237
Show file tree
Hide file tree
Showing 9 changed files with 469 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDateTime
import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
import java.math.BigDecimal
import java.math.BigInteger
Expand Down Expand Up @@ -226,8 +227,8 @@ public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColum
* @include [DataColumnStringConvertToDoubleDoc]
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser].
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
*/
@JvmName("convertToDoubleFromString")
public fun DataColumn<String>.convertToDouble(
Expand All @@ -246,8 +247,8 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
* @include [DataColumnStringConvertToDoubleDoc]
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser].
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `true`.
*/
@JvmName("convertToDoubleFromStringNullable")
public fun DataColumn<String?>.convertToDouble(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.io.readCSV
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
Expand Down Expand Up @@ -45,6 +46,12 @@ public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options
public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
parse(options) { columns.toColumnSet() }

/**
* Global counterpart of [ParserOptions].
* Settings changed here will affect the defaults for all parsing operations.
*
* The default values are set by [Parsers.resetToDefault].
*/
public interface GlobalParserOptions {

public fun addDateTimePattern(pattern: String)
Expand All @@ -54,7 +61,7 @@ public interface GlobalParserOptions {
/** This function can be called to skip some types. Parsing will be attempted for all other types. */
public fun addSkipType(type: KType)

/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
/** Whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. */
public var useFastDoubleParser: Boolean

public fun resetToDefault()
Expand Down Expand Up @@ -91,7 +98,7 @@ public interface GlobalParserOptions {
* `["null", "NULL", "NA", "N/A"]`.
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
*/
public class ParserOptions(
public val locale: Locale? = null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.size
Expand All @@ -47,6 +48,7 @@ import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.Temporal
import java.time.temporal.TemporalQuery
import java.util.Locale
import kotlin.properties.Delegates
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
Expand Down Expand Up @@ -114,6 +116,13 @@ internal class StringParserWithFormat<T>(
}
}

/**
* Central implementation for [GlobalParserOptions].
*
* Can be obtained by a user by calling [DataFrame.parser][DataFrame.Companion.parser].
*
* Defaults are set by [resetToDefault].
*/
internal object Parsers : GlobalParserOptions {

private val formatters: MutableList<DateTimeFormatter> = mutableListOf()
Expand All @@ -140,7 +149,7 @@ internal object Parsers : GlobalParserOptions {
skipTypesSet.add(type)
}

override var useFastDoubleParser: Boolean = false
override var useFastDoubleParser by Delegates.notNull<Boolean>()

private var _locale: Locale? = null

Expand All @@ -165,7 +174,7 @@ internal object Parsers : GlobalParserOptions {
.toFormatter()
.let { formatters.add(it) }

useFastDoubleParser = false
useFastDoubleParser = true
_locale = null
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
}
Expand Down
Loading

0 comments on commit 6f28237

Please sign in to comment.