diff --git a/Cargo.lock b/Cargo.lock index 76d795432..1526b88c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -105,6 +105,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "anyhow" version = "1.0.95" @@ -118,6 +168,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" dependencies = [ "bigdecimal", + "bzip2 0.4.4", + "crc32fast", "digest", "libflate", "log", @@ -128,11 +180,14 @@ dependencies = [ "serde", "serde_bytes", "serde_json", + "snap", "strum", "strum_macros", "thiserror 1.0.69", "typed-builder 0.19.1", "uuid", + "xz2", + "zstd", ] [[package]] @@ -1330,6 +1385,61 @@ dependencies = [ "inout", ] +[[package]] +name = "clap" +version = "4.5.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "clipboard-win" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892" +dependencies = [ + "error-code", +] + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + [[package]] name = "colored" version = "2.2.0" @@ -1401,6 +1511,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1609,6 +1729,7 @@ version = "45.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" dependencies = [ + "apache-avro", "arrow", "arrow-array", "arrow-ipc", @@ -1639,6 +1760,7 @@ dependencies = [ "glob", "itertools 0.14.0", "log", + "num-traits", "object_store", "parking_lot", "parquet", @@ -1675,6 +1797,36 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "datafusion-cli" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36aa3b55844f279df97ae25c5300e1518b4a3aff60f9b95c8acc3a91bf330250" +dependencies = [ + "arrow", + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "clap", + "datafusion", + "datafusion-catalog", + "dirs", + "env_logger", + "futures", + "home", + "mimalloc", + "object_store", + "parking_lot", + "parquet", + "regex", + "rustyline", + "tokio", + "url", +] + [[package]] name = "datafusion-common" version = "45.0.0" @@ -1682,6 +1834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" dependencies = [ "ahash 0.8.11", + "apache-avro", "arrow", "arrow-array", "arrow-buffer", @@ -2133,6 +2286,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -2174,6 +2348,35 @@ dependencies = [ "serde", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -2190,6 +2393,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "error-code" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" + [[package]] name = "etcetera" version = "0.8.0" @@ -2256,6 +2465,17 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "fd-lock" +version = "4.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" +dependencies = [ + "cfg-if", + "rustix 1.0.1", + "windows-sys 0.59.0", +] + [[package]] name = "fixedbitset" version = "0.5.7" @@ -2819,7 +3039,7 @@ dependencies = [ "hyper 0.14.32", "log", "rustls 0.21.12", - "rustls-native-certs", + "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", ] @@ -2835,6 +3055,7 @@ dependencies = [ "hyper 1.6.0", "hyper-util", "rustls 0.23.23", + "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", "tokio-rustls 0.26.1", @@ -3053,6 +3274,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "iceberg-cli" +version = "0.4.0" +dependencies = [ + "anyhow", + "clap", + "datafusion", + "datafusion-cli", + "dirs", + "iceberg-catalog-rest", + "iceberg-datafusion", + "tokio", + "toml", + "tracing", + "tracing-subscriber", +] + [[package]] name = "iceberg-datafusion" version = "0.4.0" @@ -3319,6 +3557,12 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + [[package]] name = "itertools" version = "0.13.0" @@ -3343,6 +3587,30 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +[[package]] +name = "jiff" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f33145a5cbea837164362c7bd596106eb7c5198f97d1ba6f6ebb3223952e488" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ce13c40ec6956157a3635d97a1ee2df323b263f09ea14165131289cb0f5c19" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "jobserver" version = "0.1.32" @@ -3495,6 +3763,26 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +[[package]] +name = "libmimalloc-sys" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec9d6fac27761dabcd4ee73571cdb06b7022dc99089acbe5435691edffaac0f4" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.8.0", + "libc", +] + [[package]] name = "libsqlite3-sys" version = "0.30.1" @@ -3649,6 +3937,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "mimalloc" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "995942f432bbb4822a7e9c3faa87a695185b0d09273ba85f097b54f4e458f2af" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "mime" version = "0.3.17" @@ -3776,6 +4073,15 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9252111cf132ba0929b6f8e030cac2a24b507f3a4d6db6fb2896f27b354c714b" +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nix" version = "0.29.0" @@ -3944,13 +4250,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" dependencies = [ "async-trait", + "base64 0.22.1", "bytes", "chrono", "futures", "humantime", + "hyper 1.6.0", "itertools 0.13.0", + "md-5", "parking_lot", "percent-encoding", + "quick-xml 0.37.4", + "rand 0.8.5", + "reqwest", + "ring", + "rustls-pemfile 2.2.0", + "serde", + "serde_json", "snafu", "tokio", "tracing", @@ -3998,6 +4314,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "2.10.1" @@ -4386,6 +4708,15 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -4504,6 +4835,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.37.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ce8c88de324ff838700f36fb6ab86c96df0e3c4ab6ef3a9b2044465cce1369" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.6" @@ -4571,6 +4912,16 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rancor" version = "0.1.0" @@ -4672,6 +5023,17 @@ dependencies = [ "bitflags 2.8.0", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.11", +] + [[package]] name = "ref-cast" version = "1.0.23" @@ -4798,6 +5160,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", + "h2 0.4.7", "http 1.3.1", "http-body 1.0.1", "http-body-util", @@ -4813,6 +5176,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.23", + "rustls-native-certs 0.8.1", "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", @@ -5053,7 +5417,19 @@ dependencies = [ "openssl-probe", "rustls-pemfile 1.0.4", "schannel", - "security-framework", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework 3.2.0", ] [[package]] @@ -5110,6 +5486,28 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +[[package]] +name = "rustyline" +version = "15.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" +dependencies = [ + "bitflags 2.8.0", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "windows-sys 0.59.0", +] + [[package]] name = "ryu" version = "1.0.19" @@ -5189,7 +5587,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags 2.8.0", - "core-foundation", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" +dependencies = [ + "bitflags 2.8.0", + "core-foundation 0.10.0", "core-foundation-sys", "libc", "security-framework-sys", @@ -5269,6 +5680,15 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -6100,11 +6520,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + [[package]] name = "toml_datetime" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] [[package]] name = "toml_edit" @@ -6113,6 +6548,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ "indexmap 2.7.1", + "serde", + "serde_spanned", "toml_datetime", "winnow", ] @@ -6404,6 +6841,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.16.0" diff --git a/Cargo.toml b/Cargo.toml index 7c4f84ca0..9c082408b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,24 +52,27 @@ arrow-select = { version = "54.2.0" } arrow-string = { version = "54.2.0" } async-std = "1.12" async-trait = "0.1.86" -aws-config = "1" +aws-config = "1.5" aws-sdk-glue = "1.39" bimap = "0.6" bitvec = "1.0.1" bytes = "1.6" chrono = "0.4.38" ctor = "0.2.8" +clap = { version = "4.5.32", features = ["derive", "cargo"] } datafusion = "45" +datafusion-cli = "45" derive_builder = "0.20" expect-test = "1" +dirs = "6.0.0" fnv = "1.0.7" futures = "0.3" hive_metastore = "0.1" http = "1.1" -iceberg = { version = "0.4.0", path = "./crates/iceberg" } -iceberg-catalog-memory = { version = "0.4.0", path = "./crates/catalog/memory" } -iceberg-catalog-rest = { version = "0.4.0", path = "./crates/catalog/rest" } -iceberg-datafusion = { version = "0.4.0", path = "./crates/integrations/datafusion" } +iceberg = { path = "./crates/iceberg" } +iceberg-catalog-memory = { path = "./crates/catalog/memory" } +iceberg-catalog-rest = { path = "./crates/catalog/rest" } +iceberg-datafusion = { path = "./crates/integrations/datafusion" } itertools = "0.13" mockito = "1" murmur3 = "0.5.2" @@ -99,6 +102,7 @@ tokio = { version = "1.44", default-features = false } tracing = "0.1.37" tracing-subscriber = "0.3.8" typed-builder = "0.20" +toml = "0.8.20" url = "2.5.4" uuid = { version = "1.14", features = ["v7"] } volo-thrift = "0.10" diff --git a/crates/integrations/cli/Cargo.toml b/crates/integrations/cli/Cargo.toml new file mode 100644 index 000000000..48c27383d --- /dev/null +++ b/crates/integrations/cli/Cargo.toml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "iceberg-cli" +description = "Apache iceberg client" +version.workspace = true +edition.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +rust-version.workspace = true +readme = "README.md" + +[dependencies] +clap = {workspace = true} +datafusion-cli = {workspace = true} +datafusion = {workspace = true} +tokio = {workspace = true} +anyhow = {workspace = true} +iceberg-datafusion = {workspace = true} +toml = {workspace = true} +iceberg-catalog-rest = {workspace = true} +tracing = {workspace = true} +tracing-subscriber = {workspace = true} +dirs = {workspace = true} \ No newline at end of file diff --git a/crates/integrations/cli/README.md b/crates/integrations/cli/README.md new file mode 100644 index 000000000..42d806af5 --- /dev/null +++ b/crates/integrations/cli/README.md @@ -0,0 +1,25 @@ + + + +# Introduction + +Iceberg CLI (`iceberg-cli`) is a small command line utility that runs SQL queries against tables, +which is backed by the DataFusion engine. + diff --git a/crates/integrations/cli/src/catalog.rs b/crates/integrations/cli/src/catalog.rs new file mode 100644 index 000000000..a1302c030 --- /dev/null +++ b/crates/integrations/cli/src/catalog.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::collections::HashMap; +use std::fs::read_to_string; +use std::path::Path; +use std::sync::Arc; + +use anyhow::anyhow; +use datafusion::catalog::{CatalogProvider, CatalogProviderList}; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; +use iceberg_datafusion::IcebergCatalogProvider; +use toml::{Table as TomlTable, Value}; + +const CONFIG_NAME_CATALOGS: &str = "catalogs"; + +#[derive(Debug)] +pub struct IcebergCatalogList { + catalogs: HashMap>, +} + +impl IcebergCatalogList { + pub async fn parse(path: &Path) -> anyhow::Result { + let toml_table: TomlTable = toml::from_str(&read_to_string(path)?)?; + Self::parse_table(&toml_table).await + } + + pub async fn parse_table(configs: &TomlTable) -> anyhow::Result { + if let Value::Array(catalogs_config) = + configs.get(CONFIG_NAME_CATALOGS).ok_or_else(|| { + anyhow::Error::msg(format!("{CONFIG_NAME_CATALOGS} entry not found in config")) + })? + { + let mut catalogs = HashMap::with_capacity(catalogs_config.len()); + for config in catalogs_config { + if let Value::Table(table_config) = config { + let (name, catalog_provider) = + IcebergCatalogList::parse_one(table_config).await?; + catalogs.insert(name, catalog_provider); + } else { + return Err(anyhow!("{CONFIG_NAME_CATALOGS} entry must be a table")); + } + } + Ok(Self { catalogs }) + } else { + Err(anyhow!("{CONFIG_NAME_CATALOGS} must be an array of table!")) + } + } + + async fn parse_one( + config: &TomlTable, + ) -> anyhow::Result<(String, Arc)> { + let name = config + .get("name") + .ok_or_else(|| anyhow::anyhow!("name not found for catalog"))? + .as_str() + .ok_or_else(|| anyhow::anyhow!("name is not string"))?; + + let r#type = config + .get("type") + .ok_or_else(|| anyhow::anyhow!("type not found for catalog"))? + .as_str() + .ok_or_else(|| anyhow::anyhow!("type is not string"))?; + + if r#type != "rest" { + return Err(anyhow::anyhow!("Only rest catalog is supported for now!")); + } + + let catalog_config = config + .get("config") + .ok_or_else(|| anyhow::anyhow!("config not found for catalog {name}"))? + .as_table() + .ok_or_else(|| anyhow::anyhow!("config is not table for catalog {name}"))?; + + let uri = catalog_config + .get("uri") + .ok_or_else(|| anyhow::anyhow!("uri not found for catalog {name}"))? + .as_str() + .ok_or_else(|| anyhow::anyhow!("uri is not string"))?; + + let warehouse = catalog_config + .get("warehouse") + .ok_or_else(|| anyhow::anyhow!("warehouse not found for catalog {name}"))? + .as_str() + .ok_or_else(|| anyhow::anyhow!("warehouse is not string for catalog {name}"))?; + + let props_table = catalog_config + .get("props") + .ok_or_else(|| anyhow::anyhow!("props not found for catalog {name}"))? + .as_table() + .ok_or_else(|| anyhow::anyhow!("props is not table for catalog {name}"))?; + + let mut props = HashMap::with_capacity(props_table.len()); + for (key, value) in props_table { + let value_str = value + .as_str() + .ok_or_else(|| anyhow::anyhow!("props {key} is not string"))?; + props.insert(key.to_string(), value_str.to_string()); + } + + let rest_catalog_config = RestCatalogConfig::builder() + .uri(uri.to_string()) + .warehouse(warehouse.to_string()) + .props(props) + .build(); + + Ok(( + name.to_string(), + Arc::new( + IcebergCatalogProvider::try_new(Arc::new(RestCatalog::new(rest_catalog_config))) + .await?, + ), + )) + } +} + +impl CatalogProviderList for IcebergCatalogList { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + _name: String, + _catalog: Arc, + ) -> Option> { + tracing::error!("Registering catalog is not supported yet"); + None + } + + fn catalog_names(&self) -> Vec { + self.catalogs.keys().cloned().collect() + } + + fn catalog(&self, name: &str) -> Option> { + self.catalogs + .get(name) + .map(|c| c.clone() as Arc) + } +} diff --git a/crates/integrations/cli/src/lib.rs b/crates/integrations/cli/src/lib.rs new file mode 100644 index 000000000..70adf6caf --- /dev/null +++ b/crates/integrations/cli/src/lib.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![doc = include_str!("../README.md")] +pub const ICEBERG_CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); + +mod catalog; +pub use catalog::*; diff --git a/crates/integrations/cli/src/main.rs b/crates/integrations/cli/src/main.rs new file mode 100644 index 000000000..d21edb4b8 --- /dev/null +++ b/crates/integrations/cli/src/main.rs @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::PathBuf; +use std::process::ExitCode; +use std::str::FromStr; +use std::sync::Arc; + +use clap::Parser; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_cli::exec; +use datafusion_cli::print_format::PrintFormat; +use datafusion_cli::print_options::{MaxRows, PrintOptions}; +use iceberg_cli::{IcebergCatalogList, ICEBERG_CLI_VERSION}; + +#[derive(Debug, Parser, PartialEq)] +#[clap(author, version, about, long_about= None)] +struct Args { + #[clap( + short = 'r', + long, + help = "Parse catalog config instead of using ~/.icebergrc" + )] + rc: Option, + + #[clap(long, value_enum, default_value_t = PrintFormat::Automatic)] + format: PrintFormat, + + #[clap( + short, + long, + help = "Reduce printing other than the results and work quietly" + )] + quiet: bool, + + #[clap( + long, + help = "The max number of rows to display for 'Table' format\n[possible values: numbers(0/10/...), inf(no limit)]", + default_value = "40" + )] + maxrows: MaxRows, + + #[clap(long, help = "Enables console syntax highlighting")] + color: bool, +} + +#[tokio::main] +/// Calls [`main_inner`], then handles printing errors and returning the correct exit code +pub async fn main() -> ExitCode { + tracing_subscriber::fmt::init(); + + if let Err(e) = main_inner().await { + println!("Error: {e}"); + return ExitCode::FAILURE; + } + + ExitCode::SUCCESS +} + +async fn main_inner() -> anyhow::Result<()> { + let args = Args::parse(); + + if !args.quiet { + println!("ICEBERG CLI v{}", ICEBERG_CLI_VERSION); + } + + let session_config = SessionConfig::from_env()?.with_information_schema(true); + + let rt_builder = RuntimeEnvBuilder::new(); + + let runtime_env = rt_builder.build_arc()?; + + // enable dynamic file query + let ctx = SessionContext::new_with_config_rt(session_config, runtime_env).enable_url_table(); + ctx.refresh_catalogs().await?; + + let mut print_options = PrintOptions { + format: args.format, + quiet: args.quiet, + maxrows: args.maxrows, + color: args.color, + }; + + let rc = match args.rc { + Some(file) => PathBuf::from_str(&file)?, + None => dirs::home_dir() + .map(|h| h.join(".icebergrc")) + .ok_or_else(|| anyhow::anyhow!("cannot find home directory"))?, + }; + + let catalogs = Arc::new(IcebergCatalogList::parse(&rc).await?); + ctx.register_catalog_list(catalogs); + + Ok(exec::exec_from_repl(&ctx, &mut print_options).await?) +}