Skip to content

Commit 439f65d

Browse files
Automated commit of generated code
1 parent 903f58b commit 439f65d

File tree

8 files changed

+626
-103
lines changed

8 files changed

+626
-103
lines changed

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

+35-10
Original file line numberDiff line numberDiff line change
@@ -184,22 +184,47 @@ public fun <T : Any> DataColumn<T>.convertToDouble(): DataColumn<Double> = conve
184184

185185
public fun <T : Any> DataColumn<T?>.convertToDouble(): DataColumn<Double?> = convertTo()
186186

187+
/** Parses a String column to Double considering locale (number format).
188+
* If [locale] parameter is defined, it's number format is used for parsing.
189+
* If [locale] parameter is null, the current system locale is used.
190+
* If the column cannot be parsed, then the POSIX format is used. */
191+
@JvmName("convertToDoubleFromString")
192+
public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColumn<Double> =
193+
convertToDouble(locale = locale, useFastDoubleParser = false)
194+
187195
/**
188-
* Parse String column to Double considering locale (number format).
196+
* Parses a String column to Double considering locale (number format).
189197
* If [locale] parameter is defined, it's number format is used for parsing.
190-
* If [locale] parameter is null, the current system locale is used. If column can not be parsed, then POSIX format is used.
198+
* If [locale] parameter is null, the current system locale is used.
199+
* If the column cannot be parsed, then the POSIX format is used.
200+
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
191201
*/
192202
@JvmName("convertToDoubleFromString")
193-
public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColumn<Double> =
194-
this.castToNullable().convertToDouble(locale).castToNotNullable()
203+
public fun DataColumn<String>.convertToDouble(
204+
locale: Locale? = null,
205+
useFastDoubleParser: Boolean,
206+
): DataColumn<Double> = this.castToNullable().convertToDouble(locale, useFastDoubleParser).castToNotNullable()
207+
208+
/** Parses a String column to Double considering locale (number format).
209+
* If [locale] parameter is defined, it's number format is used for parsing.
210+
* If [locale] parameter is null, the current system locale is used.
211+
* If the column cannot be parsed, then the POSIX format is used. */
212+
@JvmName("convertToDoubleFromStringNullable")
213+
public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColumn<Double?> =
214+
convertToDouble(locale = locale, useFastDoubleParser = false)
195215

196216
/**
197-
* Parse String column to Double considering locale (number format).
217+
* Parses a String column to Double considering locale (number format).
198218
* If [locale] parameter is defined, it's number format is used for parsing.
199-
* If [locale] parameter is null, the current system locale is used. If column can not be parsed, then POSIX format is used.
219+
* If [locale] parameter is null, the current system locale is used.
220+
* If the column cannot be parsed, then the POSIX format is used.
221+
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
200222
*/
201223
@JvmName("convertToDoubleFromStringNullable")
202-
public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColumn<Double?> {
224+
public fun DataColumn<String?>.convertToDouble(
225+
locale: Locale? = null,
226+
useFastDoubleParser: Boolean,
227+
): DataColumn<Double?> {
203228
fun applyParser(parser: (String) -> Double?): DataColumn<Double?> {
204229
var currentRow = 0
205230
try {
@@ -220,14 +245,14 @@ public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColu
220245
}
221246

222247
return if (locale != null) {
223-
val explicitParser = Parsers.getDoubleParser(locale)
248+
val explicitParser = Parsers.getDoubleParser(locale, useFastDoubleParser)
224249
applyParser(explicitParser)
225250
} else {
226251
try {
227-
val defaultParser = Parsers.getDoubleParser()
252+
val defaultParser = Parsers.getDoubleParser(useFastDoubleParser = useFastDoubleParser)
228253
applyParser(defaultParser)
229254
} catch (e: TypeConversionException) {
230-
val posixParser = Parsers.getDoubleParser(Locale.forLanguageTag("C.UTF-8"))
255+
val posixParser = Parsers.getDoubleParser(Locale.forLanguageTag("C.UTF-8"), useFastDoubleParser)
231256
applyParser(posixParser)
232257
}
233258
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

+57
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1111
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1212
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
1313
import org.jetbrains.kotlinx.dataframe.typeClass
14+
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
15+
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
1416
import java.time.format.DateTimeFormatter
1517
import java.util.Locale
1618
import kotlin.reflect.KProperty
@@ -40,13 +42,68 @@ public interface GlobalParserOptions {
4042
public var locale: Locale
4143
}
4244

45+
/**
46+
* ### Options for parsing [String]`?` columns
47+
*
48+
* @param locale locale to use for parsing dates and numbers, defaults to the System default locale.
49+
* If specified instead of [dateTimeFormatter], it will be used in combination with [dateTimePattern]
50+
* to create a [DateTimeFormatter]. Just providing [locale] will not allow you to parse
51+
* locale-specific dates!
52+
* @param dateTimeFormatter a [DateTimeFormatter] to use for parsing dates, if not specified, it will be created
53+
* from [dateTimePattern] and [locale]. If neither [dateTimeFormatter] nor [dateTimePattern] are specified,
54+
* [DateTimeFormatter.ISO_LOCAL_DATE_TIME] will be used.
55+
* @param dateTimePattern a pattern to use for parsing dates. If specified instead of [dateTimeFormatter],
56+
* it will be used to create a [DateTimeFormatter].
57+
* @param nullStrings a set of strings that should be treated as `null` values. By default, it's
58+
* ["null", "NULL", "NA", "N/A"].
59+
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
60+
*/
4361
public data class ParserOptions(
4462
val locale: Locale? = null,
4563
// TODO, migrate to kotlinx.datetime.format.DateTimeFormat? https://github.com/Kotlin/dataframe/issues/876
4664
val dateTimeFormatter: DateTimeFormatter? = null,
4765
val dateTimePattern: String? = null,
4866
val nullStrings: Set<String>? = null,
67+
val useFastDoubleParser: Boolean = false,
4968
) {
69+
70+
/** For binary compatibility. */
71+
@Deprecated(
72+
message = PARSER_OPTIONS,
73+
level = DeprecationLevel.HIDDEN,
74+
)
75+
public constructor(
76+
locale: Locale? = null,
77+
dateTimeFormatter: DateTimeFormatter? = null,
78+
dateTimePattern: String? = null,
79+
nullStrings: Set<String>? = null,
80+
) : this(
81+
locale = locale,
82+
dateTimeFormatter = dateTimeFormatter,
83+
dateTimePattern = dateTimePattern,
84+
nullStrings = nullStrings,
85+
useFastDoubleParser = false,
86+
)
87+
88+
/** For binary compatibility. */
89+
@Deprecated(
90+
message = PARSER_OPTIONS_COPY,
91+
level = DeprecationLevel.HIDDEN,
92+
)
93+
public fun copy(
94+
locale: Locale? = this.locale,
95+
dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter,
96+
dateTimePattern: String? = this.dateTimePattern,
97+
nullStrings: Set<String>? = this.nullStrings,
98+
): ParserOptions =
99+
ParserOptions(
100+
locale = locale,
101+
dateTimeFormatter = dateTimeFormatter,
102+
dateTimePattern = dateTimePattern,
103+
nullStrings = nullStrings,
104+
useFastDoubleParser = useFastDoubleParser,
105+
)
106+
50107
internal fun getDateTimeFormatter(): DateTimeFormatter? =
51108
when {
52109
dateTimeFormatter != null -> dateTimeFormatter

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt

-25
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,6 @@ import kotlin.annotation.AnnotationTarget.TYPE
1414
import kotlin.annotation.AnnotationTarget.TYPEALIAS
1515
import kotlin.annotation.AnnotationTarget.VALUE_PARAMETER
1616

17-
/**
18-
*
19-
* &nbsp;&nbsp;&nbsp;&nbsp;
20-
*
21-
*/
22-
internal interface LineBreak
23-
24-
/** &nbsp; */
25-
internal interface QuarterIndent
26-
27-
/** &nbsp;&nbsp; */
28-
internal interface HalfIndent
29-
30-
/** &nbsp;&nbsp;&nbsp;&nbsp; */
31-
internal interface Indent
32-
33-
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
34-
internal interface DoubleIndent
35-
36-
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
37-
internal interface TripleIndent
38-
39-
/** &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; */
40-
internal interface QuadrupleIndent
41-
4217
/**
4318
* Any `Documentable` annotated with this annotation will be excluded from the generated sources by
4419
* the documentation processor.

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

+10-29
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ import org.jetbrains.kotlinx.dataframe.hasNulls
3434
import org.jetbrains.kotlinx.dataframe.impl.canParse
3535
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
3636
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
37+
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
3738
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3839
import org.jetbrains.kotlinx.dataframe.io.isURL
3940
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
4041
import org.jetbrains.kotlinx.dataframe.values
4142
import java.math.BigDecimal
4243
import java.net.URL
43-
import java.text.NumberFormat
4444
import java.text.ParsePosition
4545
import java.time.format.DateTimeFormatter
4646
import java.time.format.DateTimeFormatterBuilder
@@ -275,29 +275,6 @@ internal object Parsers : GlobalParserOptions {
275275
null
276276
}
277277

278-
private fun String.parseDouble(format: NumberFormat) =
279-
when (uppercase(Locale.getDefault())) {
280-
"NAN" -> Double.NaN
281-
282-
"INF" -> Double.POSITIVE_INFINITY
283-
284-
"-INF" -> Double.NEGATIVE_INFINITY
285-
286-
"INFINITY" -> Double.POSITIVE_INFINITY
287-
288-
"-INFINITY" -> Double.NEGATIVE_INFINITY
289-
290-
else -> {
291-
val parsePosition = ParsePosition(0)
292-
val result: Double? = format.parse(this, parsePosition)?.toDouble()
293-
if (parsePosition.index != this.length) {
294-
null
295-
} else {
296-
result
297-
}
298-
}
299-
}
300-
301278
inline fun <reified T : Any> stringParser(
302279
catch: Boolean = false,
303280
coveredBy: Set<KType> = emptySet(),
@@ -317,11 +294,15 @@ internal object Parsers : GlobalParserOptions {
317294
): StringParserWithFormat<T> = StringParserWithFormat(typeOf<T>(), coveredBy, body)
318295

319296
private val parserToDoubleWithOptions = stringParserWithOptions { options ->
320-
val numberFormat = NumberFormat.getInstance(options?.locale ?: Locale.getDefault())
321-
val parser = { it: String -> it.parseDouble(numberFormat) }
297+
val fastDoubleParser = FastDoubleParser(options ?: ParserOptions())
298+
val parser = { it: String -> fastDoubleParser.parseOrNull(it) }
322299
parser
323300
}
324301

302+
private val posixDoubleParser = FastDoubleParser(
303+
ParserOptions(locale = Locale.forLanguageTag("C.UTF-8")),
304+
)
305+
325306
internal val parsersOrder = listOf(
326307
// Int
327308
stringParser<Int> { it.toIntOrNull() },
@@ -384,7 +365,7 @@ internal object Parsers : GlobalParserOptions {
384365
// Double, with explicit number format or taken from current locale
385366
parserToDoubleWithOptions,
386367
// Double, with POSIX format
387-
stringParser<Double> { it.parseDouble(NumberFormat.getInstance(Locale.forLanguageTag("C.UTF-8"))) },
368+
stringParser<Double> { posixDoubleParser.parseOrNull(it) },
388369
// Boolean
389370
stringParser<Boolean> { it.toBooleanOrNull() },
390371
// BigDecimal
@@ -449,9 +430,9 @@ internal object Parsers : GlobalParserOptions {
449430
return parser.applyOptions(options)
450431
}
451432

452-
internal fun getDoubleParser(locale: Locale? = null): (String) -> Double? {
433+
internal fun getDoubleParser(locale: Locale? = null, useFastDoubleParser: Boolean): (String) -> Double? {
453434
val options = if (locale != null) {
454-
ParserOptions(locale = locale)
435+
ParserOptions(locale = locale, useFastDoubleParser = useFastDoubleParser)
455436
} else {
456437
null
457438
}

0 commit comments

Comments
 (0)