Skip to content

Commit cc65b72

Browse files
authored
Add slt tests for datafusion.execution.parquet.coerce_int96 setting (#15723)
* Add slt tests for datafusion.execution.parquet.coerce_int96 setting * tweak
1 parent d2a2a8b commit cc65b72

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

datafusion/sqllogictest/test_files/parquet.slt

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,35 @@ statement ok
631631
drop table foo
632632

633633

634+
# Tests for int96 timestamps written by spark
635+
# See https://github.com/apache/datafusion/issues/9981
636+
637+
statement ok
638+
CREATE EXTERNAL TABLE int96_from_spark
639+
STORED AS PARQUET
640+
LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
641+
642+
# by default the value is read as nanosecond precision
643+
query TTT
644+
describe int96_from_spark
645+
----
646+
a Timestamp(Nanosecond, None) YES
647+
648+
# Note that the values are read as nanosecond precision
649+
query P
650+
select * from int96_from_spark
651+
----
652+
2024-01-01T20:34:56.123456
653+
2024-01-01T01:00:00
654+
1816-03-29T08:56:08.066277376
655+
2024-12-30T23:00:00
656+
NULL
657+
1815-11-08T16:01:01.191053312
658+
659+
statement ok
660+
drop table int96_from_spark;
661+
662+
# Enable coercion of int96 to microseconds
634663
statement ok
635664
set datafusion.execution.parquet.coerce_int96 = ms;
636665

@@ -645,5 +674,33 @@ describe int96_from_spark;
645674
----
646675
a Timestamp(Millisecond, None) YES
647676

677+
# Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
678+
# these values should be
679+
#
680+
# Some("2024-01-01T12:34:56.123456"),
681+
# Some("2024-01-01T01:00:00Z"),
682+
# Some("9999-12-31T01:00:00-02:00"),
683+
# Some("2024-12-31T01:00:00+02:00"),
684+
# None,
685+
# Some("290000-12-31T01:00:00+02:00"))
686+
#
687+
# However, printing the large dates (9999-12-31 and 290000-12-31) is not supported by
688+
# arrow yet
689+
#
690+
# See https://github.com/apache/arrow-rs/issues/7287
691+
query P
692+
select * from int96_from_spark
693+
----
694+
2024-01-01T20:34:56.123
695+
2024-01-01T01:00:00
696+
9999-12-31T03:00:00
697+
2024-12-30T23:00:00
698+
NULL
699+
ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
700+
701+
# Cleanup / reset default setting
702+
statement ok
703+
drop table int96_from_spark;
704+
648705
statement ok
649706
set datafusion.execution.parquet.coerce_int96 = ns;

0 commit comments

Comments
 (0)