@@ -631,6 +631,35 @@ statement ok
631
631
drop table foo
632
632
633
633
634
+ # Tests for int96 timestamps written by spark
635
+ # See https://github.com/apache/datafusion/issues/9981
636
+
637
+ statement ok
638
+ CREATE EXTERNAL TABLE int96_from_spark
639
+ STORED AS PARQUET
640
+ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
641
+
642
+ # by default the value is read as nanosecond precision
643
+ query TTT
644
+ describe int96_from_spark
645
+ ----
646
+ a Timestamp(Nanosecond, None) YES
647
+
648
+ # Note that the values are read as nanosecond precision
649
+ query P
650
+ select * from int96_from_spark
651
+ ----
652
+ 2024-01-01T20:34:56.123456
653
+ 2024-01-01T01:00:00
654
+ 1816-03-29T08:56:08.066277376
655
+ 2024-12-30T23:00:00
656
+ NULL
657
+ 1815-11-08T16:01:01.191053312
658
+
659
+ statement ok
660
+ drop table int96_from_spark;
661
+
662
+ # Enable coercion of int96 to microseconds
634
663
statement ok
635
664
set datafusion.execution.parquet.coerce_int96 = ms;
636
665
@@ -645,5 +674,33 @@ describe int96_from_spark;
645
674
----
646
675
a Timestamp(Millisecond, None) YES
647
676
677
+ # Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
678
+ # these values should be
679
+ #
680
+ # Some("2024-01-01T12:34:56.123456"),
681
+ # Some("2024-01-01T01:00:00Z"),
682
+ # Some("9999-12-31T01:00:00-02:00"),
683
+ # Some("2024-12-31T01:00:00+02:00"),
684
+ # None,
685
+ # Some("290000-12-31T01:00:00+02:00"))
686
+ #
687
+ # However, printing the large dates (9999-12-31 and 290000-12-31) is not supported by
688
+ # arrow yet
689
+ #
690
+ # See https://github.com/apache/arrow-rs/issues/7287
691
+ query P
692
+ select * from int96_from_spark
693
+ ----
694
+ 2024-01-01T20:34:56.123
695
+ 2024-01-01T01:00:00
696
+ 9999-12-31T03:00:00
697
+ 2024-12-30T23:00:00
698
+ NULL
699
+ ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
700
+
701
+ # Cleanup / reset default setting
702
+ statement ok
703
+ drop table int96_from_spark;
704
+
648
705
statement ok
649
706
set datafusion.execution.parquet.coerce_int96 = ns;
0 commit comments