@@ -764,299 +764,6 @@ impl DataType {
764
764
}
765
765
}
766
766
767
- /// The metadata key for the string name identifying the custom data type.
768
- pub const EXTENSION_TYPE_NAME_KEY : & str = "ARROW:extension:name" ;
769
-
770
- /// The metadata key for a serialized representation of the ExtensionType
771
- /// necessary to reconstruct the custom type.
772
- pub const EXTENSION_TYPE_METADATA_KEY : & str = "ARROW:extension:metadata" ;
773
-
774
- /// Extension types.
775
- ///
776
- /// <https://arrow.apache.org/docs/format/Columnar.html#extension-types>
777
- pub trait ExtensionType : Sized {
778
- /// The name of this extension type.
779
- const NAME : & ' static str ;
780
-
781
- /// The supported storage types of this extension type.
782
- fn storage_types ( & self ) -> & [ DataType ] ;
783
-
784
- /// The metadata type of this extension type.
785
- type Metadata ;
786
-
787
- /// Returns a reference to the metadata of this extension type, or `None`
788
- /// if this extension type has no metadata.
789
- fn metadata ( & self ) -> Option < & Self :: Metadata > ;
790
-
791
- /// Returns the serialized representation of the metadata of this extension
792
- /// type, or `None` if this extension type has no metadata.
793
- fn serialized_metadata ( & self ) -> Option < String > ;
794
-
795
- /// Deserialize this extension type from the serialized representation of the
796
- /// metadata of this extension. An extension type that has no metadata should
797
- /// expect `None` for for the serialized metadata.
798
- fn from_serialized_metadata ( serialized_metadata : Option < & str > ) -> Option < Self > ;
799
- }
800
-
801
- pub ( crate ) trait ExtensionTypeExt : ExtensionType {
802
- /// Returns `true` if the given data type is supported by this extension
803
- /// type.
804
- fn supports ( & self , data_type : & DataType ) -> bool {
805
- self . storage_types ( ) . contains ( data_type)
806
- }
807
-
808
- /// Try to extract this extension type from the given [`Field`].
809
- ///
810
- /// This function returns `None` if extension type
811
- /// - information is missing
812
- /// - name does not match
813
- /// - metadata deserialization failed
814
- /// - does not support the data type of this field
815
- fn try_from_field ( field : & Field ) -> Option < Self > {
816
- field
817
- . metadata ( )
818
- . get ( EXTENSION_TYPE_NAME_KEY )
819
- . and_then ( |name| {
820
- ( name == <Self as ExtensionType >:: NAME )
821
- . then ( || {
822
- Self :: from_serialized_metadata (
823
- field
824
- . metadata ( )
825
- . get ( EXTENSION_TYPE_METADATA_KEY )
826
- . map ( String :: as_str) ,
827
- )
828
- } )
829
- . flatten ( )
830
- } )
831
- . filter ( |extension_type| extension_type. supports ( field. data_type ( ) ) )
832
- }
833
- }
834
-
835
- impl < T > ExtensionTypeExt for T where T : ExtensionType { }
836
-
837
- /// Canonical extension types.
838
- ///
839
- /// The Arrow columnar format allows defining extension types so as to extend
840
- /// standard Arrow data types with custom semantics. Often these semantics will
841
- /// be specific to a system or application. However, it is beneficial to share
842
- /// the definitions of well-known extension types so as to improve
843
- /// interoperability between different systems integrating Arrow columnar data.
844
- pub mod canonical_extension_types {
845
- use serde_json:: Value ;
846
-
847
- use super :: { DataType , ExtensionType } ;
848
-
849
- /// Canonical extension types.
850
- #[ non_exhaustive]
851
- #[ derive( Debug , Clone , PartialEq ) ]
852
- pub enum CanonicalExtensionTypes {
853
- /// The extension type for 'JSON'.
854
- Json ( Json ) ,
855
- /// The extension type for `UUID`.
856
- Uuid ( Uuid ) ,
857
- }
858
-
859
- impl From < Json > for CanonicalExtensionTypes {
860
- fn from ( value : Json ) -> Self {
861
- CanonicalExtensionTypes :: Json ( value)
862
- }
863
- }
864
-
865
- impl From < Uuid > for CanonicalExtensionTypes {
866
- fn from ( value : Uuid ) -> Self {
867
- CanonicalExtensionTypes :: Uuid ( value)
868
- }
869
- }
870
-
871
- /// The extension type for `JSON`.
872
- ///
873
- /// Extension name: `arrow.json`.
874
- ///
875
- /// The storage type of this extension is `String` or `LargeString` or
876
- /// `StringView`. Only UTF-8 encoded JSON as specified in [rfc8259](https://datatracker.ietf.org/doc/html/rfc8259)
877
- /// is supported.
878
- ///
879
- /// This type does not have any parameters.
880
- ///
881
- /// Metadata is either an empty string or a JSON string with an empty
882
- /// object. In the future, additional fields may be added, but they are not
883
- /// required to interpret the array.
884
- ///
885
- /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#json>
886
- #[ derive( Debug , Clone , PartialEq ) ]
887
- pub struct Json ( Value ) ;
888
-
889
- impl Default for Json {
890
- fn default ( ) -> Self {
891
- Self ( Value :: String ( "" . to_owned ( ) ) )
892
- }
893
- }
894
-
895
- impl ExtensionType for Json {
896
- const NAME : & ' static str = "arrow.json" ;
897
-
898
- type Metadata = Value ;
899
-
900
- fn storage_types ( & self ) -> & [ DataType ] {
901
- & [ DataType :: Utf8 , DataType :: LargeUtf8 , DataType :: Utf8View ]
902
- }
903
-
904
- fn metadata ( & self ) -> Option < & Self :: Metadata > {
905
- Some ( & self . 0 )
906
- }
907
-
908
- fn serialized_metadata ( & self ) -> Option < String > {
909
- Some ( self . 0 . to_string ( ) )
910
- }
911
-
912
- fn from_serialized_metadata ( serialized_metadata : Option < & str > ) -> Option < Self > {
913
- serialized_metadata. and_then ( |metadata| match metadata {
914
- // Empty string
915
- r#""""# => Some ( Default :: default ( ) ) ,
916
- // Empty object
917
- value => value
918
- . parse :: < Value > ( )
919
- . ok ( )
920
- . filter ( |value| matches ! ( value. as_object( ) , Some ( map) if map. is_empty( ) ) )
921
- . map ( Self ) ,
922
- } )
923
- }
924
- }
925
-
926
- /// The extension type for `UUID`.
927
- ///
928
- /// Extension name: `arrow.uuid`.
929
- ///
930
- /// The storage type of the extension is `FixedSizeBinary` with a length of
931
- /// 16 bytes.
932
- ///
933
- /// Note:
934
- /// A specific UUID version is not required or guaranteed. This extension
935
- /// represents UUIDs as `FixedSizeBinary(16)` with big-endian notation and
936
- /// does not interpret the bytes in any way.
937
- ///
938
- /// <https://arrow.apache.org/docs/format/CanonicalExtensions.html#uuid>
939
- #[ derive( Debug , Default , Clone , Copy , PartialEq ) ]
940
- pub struct Uuid ;
941
-
942
- impl ExtensionType for Uuid {
943
- const NAME : & ' static str = "arrow.uuid" ;
944
-
945
- type Metadata = ( ) ;
946
-
947
- fn storage_types ( & self ) -> & [ DataType ] {
948
- & [ DataType :: FixedSizeBinary ( 16 ) ]
949
- }
950
-
951
- fn metadata ( & self ) -> Option < & Self :: Metadata > {
952
- None
953
- }
954
-
955
- fn serialized_metadata ( & self ) -> Option < String > {
956
- None
957
- }
958
-
959
- fn from_serialized_metadata ( serialized_metadata : Option < & str > ) -> Option < Self > {
960
- serialized_metadata. is_none ( ) . then_some ( Self )
961
- }
962
- }
963
-
964
- #[ cfg( test) ]
965
- mod tests {
966
- use std:: collections:: HashMap ;
967
-
968
- use serde_json:: Map ;
969
-
970
- use crate :: { ArrowError , Field , EXTENSION_TYPE_METADATA_KEY , EXTENSION_TYPE_NAME_KEY } ;
971
-
972
- use super :: * ;
973
-
974
- #[ test]
975
- fn json ( ) -> Result < ( ) , ArrowError > {
976
- let mut field = Field :: new ( "" , DataType :: Utf8 , false ) ;
977
- field. try_with_extension_type ( Json :: default ( ) ) ?;
978
- assert_eq ! (
979
- field. metadata( ) . get( EXTENSION_TYPE_METADATA_KEY ) ,
980
- Some ( & r#""""# . to_owned( ) )
981
- ) ;
982
- assert ! ( field. extension_type:: <Json >( ) . is_some( ) ) ;
983
-
984
- let mut field = Field :: new ( "" , DataType :: LargeUtf8 , false ) ;
985
- field. try_with_extension_type ( Json ( serde_json:: Value :: Object ( Map :: default ( ) ) ) ) ?;
986
- assert_eq ! (
987
- field. metadata( ) . get( EXTENSION_TYPE_METADATA_KEY ) ,
988
- Some ( & "{}" . to_owned( ) )
989
- ) ;
990
- assert ! ( field. extension_type:: <Json >( ) . is_some( ) ) ;
991
-
992
- let mut field = Field :: new ( "" , DataType :: Utf8View , false ) ;
993
- field. try_with_extension_type ( Json :: default ( ) ) ?;
994
- assert ! ( field. extension_type:: <Json >( ) . is_some( ) ) ;
995
- assert_eq ! (
996
- field. canonical_extension_type( ) ,
997
- Some ( CanonicalExtensionTypes :: Json ( Json :: default ( ) ) )
998
- ) ;
999
- Ok ( ( ) )
1000
- }
1001
-
1002
- #[ test]
1003
- #[ should_panic( expected = "expected Utf8 or LargeUtf8 or Utf8View, found Boolean" ) ]
1004
- fn json_bad_type ( ) {
1005
- Field :: new ( "" , DataType :: Boolean , false ) . with_extension_type ( Json :: default ( ) ) ;
1006
- }
1007
-
1008
- #[ test]
1009
- fn json_bad_metadata ( ) {
1010
- let field = Field :: new ( "" , DataType :: Utf8 , false ) . with_metadata ( HashMap :: from_iter ( [
1011
- ( EXTENSION_TYPE_NAME_KEY . to_owned ( ) , Json :: NAME . to_owned ( ) ) ,
1012
- ( EXTENSION_TYPE_METADATA_KEY . to_owned ( ) , "1234" . to_owned ( ) ) ,
1013
- ] ) ) ;
1014
- // This returns `None` now because this metadata is invalid.
1015
- assert ! ( field. extension_type:: <Json >( ) . is_none( ) ) ;
1016
- }
1017
-
1018
- #[ test]
1019
- fn json_missing_metadata ( ) {
1020
- let field = Field :: new ( "" , DataType :: LargeUtf8 , false ) . with_metadata (
1021
- HashMap :: from_iter ( [ ( EXTENSION_TYPE_NAME_KEY . to_owned ( ) , Json :: NAME . to_owned ( ) ) ] ) ,
1022
- ) ;
1023
- // This returns `None` now because the metadata is missing.
1024
- assert ! ( field. extension_type:: <Json >( ) . is_none( ) ) ;
1025
- }
1026
-
1027
- #[ test]
1028
- fn uuid ( ) -> Result < ( ) , ArrowError > {
1029
- let mut field = Field :: new ( "" , DataType :: FixedSizeBinary ( 16 ) , false ) ;
1030
- field. try_with_extension_type ( Uuid ) ?;
1031
- assert ! ( field. extension_type:: <Uuid >( ) . is_some( ) ) ;
1032
- assert_eq ! (
1033
- field. canonical_extension_type( ) ,
1034
- Some ( CanonicalExtensionTypes :: Uuid ( Uuid ) )
1035
- ) ;
1036
- Ok ( ( ) )
1037
- }
1038
-
1039
- #[ test]
1040
- #[ should_panic( expected = "expected FixedSizeBinary(16), found FixedSizeBinary(8)" ) ]
1041
- fn uuid_bad_type ( ) {
1042
- Field :: new ( "" , DataType :: FixedSizeBinary ( 8 ) , false ) . with_extension_type ( Uuid ) ;
1043
- }
1044
-
1045
- #[ test]
1046
- fn uuid_with_metadata ( ) {
1047
- // Add metadata that's not expected for uuid.
1048
- let field = Field :: new ( "" , DataType :: FixedSizeBinary ( 16 ) , false )
1049
- . with_metadata ( HashMap :: from_iter ( [ (
1050
- EXTENSION_TYPE_METADATA_KEY . to_owned ( ) ,
1051
- "" . to_owned ( ) ,
1052
- ) ] ) )
1053
- . with_extension_type ( Uuid ) ;
1054
- // This returns `None` now because `Uuid` expects no metadata.
1055
- assert ! ( field. extension_type:: <Uuid >( ) . is_none( ) ) ;
1056
- }
1057
- }
1058
- }
1059
-
1060
767
/// The maximum precision for [DataType::Decimal128] values
1061
768
pub const DECIMAL128_MAX_PRECISION : u8 = 38 ;
1062
769
0 commit comments