Skip to content

Commit

Permalink
set useFastDoubleParser parser option to true by default and updated …
Browse files Browse the repository at this point in the history
…KDocs
  • Loading branch information
Jolanrensen committed Jan 29, 2025
1 parent 8cc6c00 commit 141312a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.io.readCSV
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
Expand Down Expand Up @@ -45,6 +46,12 @@ public fun <T, C> DataFrame<T>.parse(vararg columns: ColumnReference<C>, options
public fun <T, C> DataFrame<T>.parse(vararg columns: KProperty<C>, options: ParserOptions? = null): DataFrame<T> =
parse(options) { columns.toColumnSet() }

/**
* Global counterpart of [ParserOptions].
* Settings changed here will affect the defaults for all parsing operations.
*
* The default values are set by [Parsers.resetToDefault].
*/
public interface GlobalParserOptions {

public fun addDateTimePattern(pattern: String)
Expand All @@ -54,7 +61,7 @@ public interface GlobalParserOptions {
/** This function can be called to skip some types. Parsing will be attempted for all other types. */
public fun addSkipType(type: KType)

/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
/** Whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. */
public var useFastDoubleParser: Boolean

public fun resetToDefault()
Expand Down Expand Up @@ -91,7 +98,7 @@ public interface GlobalParserOptions {
* `["null", "NULL", "NA", "N/A"]`.
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
*/
public class ParserOptions(
public val locale: Locale? = null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.api.parser
import org.jetbrains.kotlinx.dataframe.api.to
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
import org.jetbrains.kotlinx.dataframe.columns.size
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
import org.jetbrains.kotlinx.dataframe.hasNulls
import org.jetbrains.kotlinx.dataframe.impl.asNullable
import org.jetbrains.kotlinx.dataframe.impl.canParse
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
Expand All @@ -47,6 +49,7 @@ import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.Temporal
import java.time.temporal.TemporalQuery
import java.util.Locale
import kotlin.properties.Delegates
import kotlin.reflect.KClass
import kotlin.reflect.KType
import kotlin.reflect.full.withNullability
Expand Down Expand Up @@ -114,6 +117,13 @@ internal class StringParserWithFormat<T>(
}
}

/**
* Central implementation for [GlobalParserOptions].
*
* Can be obtained by a user by calling [DataFrame.parser][DataFrame.Companion.parser].
*
* Defaults are set by [resetToDefault].
*/
internal object Parsers : GlobalParserOptions {

private val formatters: MutableList<DateTimeFormatter> = mutableListOf()
Expand All @@ -140,7 +150,7 @@ internal object Parsers : GlobalParserOptions {
skipTypesSet.add(type)
}

override var useFastDoubleParser: Boolean = false
override var useFastDoubleParser by Delegates.notNull<Boolean>()

private var _locale: Locale? = null

Expand All @@ -165,7 +175,7 @@ internal object Parsers : GlobalParserOptions {
.toFormatter()
.let { formatters.add(it) }

useFastDoubleParser = false
useFastDoubleParser = true
_locale = null
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ private val NANS = arrayOf("nan", "na", "n/a")
/**
* Parses a [String]/[CharSequence], [CharArray], or [ByteArray] into a [Double].
*
* If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with an _EXPERIMENTAL_
* fast double parser, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
* If [ParserOptions.useFastDoubleParser] is enabled, it will try to parse the input with the
* fast double parser library, [FastDoubleParser](https://github.com/wrandelshofer/FastDoubleParser).
* If not, or if it fails, it will use [NumberFormat] to parse the input.
*
* Public, so it can be used in other modules.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,6 @@ internal object DelimParams {
* ([DataFrame.parser][DataFrame.Companion.parser]) will be queried.
*
* The only exceptions are:
* - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`,
* regardless of the global setting.
* - [nullStrings][ParserOptions.nullStrings], which, if `null`,
* will take the global setting + {@include [DefaultNullStringsContentLink]}.
* - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,7 @@ internal fun readDelimImpl(
): DataFrame<*> {
// set up the csv specs
val csvSpecs = with(CsvSpecs.builder()) {
// turn on fast double parser if not explicitly set regardless of the global parser options
@Suppress("NullableBooleanElvis")
val adjustedParserOptions = (parserOptions ?: ParserOptions())
.copy(useFastDoubleParser = parserOptions?.useFastDoubleParser ?: true)
customDoubleParser(DataFrameCustomDoubleParser(adjustedParserOptions))
customDoubleParser(DataFrameCustomDoubleParser(parserOptions))

// use the given nullStrings if provided, else take the global ones + some extras
val nullStrings = parserOptions?.nullStrings ?: (DataFrame.parser.nulls + DEFAULT_DELIM_NULL_STRINGS)
Expand Down

0 comments on commit 141312a

Please sign in to comment.