Skip to content

Commit 3a947fa

Browse files
Add hive metastore catalog support (part 1/2) (#237)
* fmt members * setup basic test-infra for hms-catalog * add license * add hms create_namespace * add hms get_namespace * fix: typo * add hms namespace_exists and drop_namespace * add hms update_namespace * move fns into HmsCatalog * use `expose` in docker-compose * add hms list_tables * fix: clippy * fix: cargo sort * fix: cargo workspace * move fns into utils + add constants * include database name in error msg * add pilota to cargo workspace * add minio version * change visibility to pub(crate); return namespace from conversion fn * add minio version in rest-catalog docker-compose * fix: hms test docker infrastructure * add version to minio/mc * fix: license header * fix: core-site --------- Co-authored-by: mlanhenke <[email protected]>
1 parent 0914f7a commit 3a947fa

File tree

12 files changed

+825
-41
lines changed

12 files changed

+825
-41
lines changed

.cargo/audit.toml

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
[advisories]
1919
ignore = [
20-
# rsa
21-
# Marvin Attack: potential key recovery through timing sidechannels
22-
# Issues: https://github.com/apache/iceberg-rust/issues/221
23-
"RUSTSEC-2023-0071",
20+
# rsa
21+
# Marvin Attack: potential key recovery through timing sidechannels
22+
# Issues: https://github.com/apache/iceberg-rust/issues/221
23+
"RUSTSEC-2023-0071",
2424
]

Cargo.toml

+7-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@
1717

1818
[workspace]
1919
resolver = "2"
20-
members = ["crates/catalog/*", "crates/examples", "crates/iceberg", "crates/test_utils"]
20+
members = [
21+
"crates/catalog/*",
22+
"crates/examples",
23+
"crates/iceberg",
24+
"crates/test_utils",
25+
]
2126

2227
[workspace.package]
2328
version = "0.2.0"
@@ -55,6 +60,7 @@ once_cell = "1"
5560
opendal = "0.45"
5661
ordered-float = "4.0.0"
5762
parquet = "50"
63+
pilota = "0.10.0"
5864
pretty_assertions = "1.4.0"
5965
port_scanner = "0.1.5"
6066
reqwest = { version = "^0.11", features = ["json"] }

crates/catalog/hms/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,12 @@ anyhow = { workspace = true }
3333
async-trait = { workspace = true }
3434
hive_metastore = { workspace = true }
3535
iceberg = { workspace = true }
36+
log = { workspace = true }
37+
pilota = { workspace = true }
3638
typed-builder = { workspace = true }
3739
volo-thrift = { workspace = true }
40+
41+
[dev-dependencies]
42+
iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
43+
port_scanner = { workspace = true }
44+
tokio = { workspace = true }

crates/catalog/hms/src/catalog.rs

+145-15
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use super::utils::*;
1919
use async_trait::async_trait;
2020
use hive_metastore::ThriftHiveMetastoreClient;
2121
use hive_metastore::ThriftHiveMetastoreClientBuilder;
22+
use hive_metastore::ThriftHiveMetastoreGetDatabaseException;
2223
use iceberg::table::Table;
2324
use iceberg::{
2425
Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation,
@@ -28,6 +29,7 @@ use std::collections::HashMap;
2829
use std::fmt::{Debug, Formatter};
2930
use std::net::ToSocketAddrs;
3031
use typed_builder::TypedBuilder;
32+
use volo_thrift::ResponseError;
3133

3234
/// Which variant of the thrift transport to communicate with HMS
3335
/// See: <https://github.com/apache/thrift/blob/master/doc/specs/thrift-rpc.md#framed-vs-unframed-transport>
@@ -97,7 +99,6 @@ impl HmsCatalog {
9799
}
98100
}
99101

100-
/// Refer to <https://github.com/apache/iceberg/blob/main/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java> for implementation details.
101102
#[async_trait]
102103
impl Catalog for HmsCatalog {
103104
/// HMS doesn't support nested namespaces.
@@ -125,36 +126,165 @@ impl Catalog for HmsCatalog {
125126
.collect())
126127
}
127128

129+
/// Creates a new namespace with the given identifier and properties.
130+
///
131+
/// Attempts to create a namespace defined by the `namespace`
132+
/// parameter and configured with the specified `properties`.
133+
///
134+
/// This function can return an error in the following situations:
135+
///
136+
/// - If `hive.metastore.database.owner-type` is specified without
137+
/// `hive.metastore.database.owner`,
138+
/// - Errors from `validate_namespace` if the namespace identifier does not
139+
/// meet validation criteria.
140+
/// - Errors from `convert_to_database` if the properties cannot be
141+
/// successfully converted into a database configuration.
142+
/// - Errors from the underlying database creation process, converted using
143+
/// `from_thrift_error`.
128144
async fn create_namespace(
129145
&self,
130-
_namespace: &NamespaceIdent,
131-
_properties: HashMap<String, String>,
146+
namespace: &NamespaceIdent,
147+
properties: HashMap<String, String>,
132148
) -> Result<Namespace> {
133-
todo!()
149+
let database = convert_to_database(namespace, &properties)?;
150+
151+
self.client
152+
.0
153+
.create_database(database)
154+
.await
155+
.map_err(from_thrift_error)?;
156+
157+
Ok(Namespace::new(namespace.clone()))
134158
}
135159

136-
async fn get_namespace(&self, _namespace: &NamespaceIdent) -> Result<Namespace> {
137-
todo!()
160+
/// Retrieves a namespace by its identifier.
161+
///
162+
/// Validates the given namespace identifier and then queries the
163+
/// underlying database client to fetch the corresponding namespace data.
164+
/// Constructs a `Namespace` object with the retrieved data and returns it.
165+
///
166+
/// This function can return an error in any of the following situations:
167+
/// - If the provided namespace identifier fails validation checks
168+
/// - If there is an error querying the database, returned by
169+
/// `from_thrift_error`.
170+
async fn get_namespace(&self, namespace: &NamespaceIdent) -> Result<Namespace> {
171+
let name = validate_namespace(namespace)?;
172+
173+
let db = self
174+
.client
175+
.0
176+
.get_database(name.clone().into())
177+
.await
178+
.map_err(from_thrift_error)?;
179+
180+
let ns = convert_to_namespace(&db)?;
181+
182+
Ok(ns)
138183
}
139184

140-
async fn namespace_exists(&self, _namespace: &NamespaceIdent) -> Result<bool> {
141-
todo!()
185+
/// Checks if a namespace exists within the Hive Metastore.
186+
///
187+
/// Validates the namespace identifier by querying the Hive Metastore
188+
/// to determine if the specified namespace (database) exists.
189+
///
190+
/// # Returns
191+
/// A `Result<bool>` indicating the outcome of the check:
192+
/// - `Ok(true)` if the namespace exists.
193+
/// - `Ok(false)` if the namespace does not exist, identified by a specific
194+
/// `UserException` variant.
195+
/// - `Err(...)` if an error occurs during validation or the Hive Metastore
196+
/// query, with the error encapsulating the issue.
197+
async fn namespace_exists(&self, namespace: &NamespaceIdent) -> Result<bool> {
198+
let name = validate_namespace(namespace)?;
199+
200+
let resp = self.client.0.get_database(name.clone().into()).await;
201+
202+
match resp {
203+
Ok(_) => Ok(true),
204+
Err(err) => {
205+
if let ResponseError::UserException(ThriftHiveMetastoreGetDatabaseException::O1(
206+
_,
207+
)) = &err
208+
{
209+
Ok(false)
210+
} else {
211+
Err(from_thrift_error(err))
212+
}
213+
}
214+
}
142215
}
143216

217+
/// Asynchronously updates properties of an existing namespace.
218+
///
219+
/// Converts the given namespace identifier and properties into a database
220+
/// representation and then attempts to update the corresponding namespace
221+
/// in the Hive Metastore.
222+
///
223+
/// # Returns
224+
/// Returns `Ok(())` if the namespace update is successful. If the
225+
/// namespace cannot be updated due to missing information or an error
226+
/// during the update process, an `Err(...)` is returned.
144227
async fn update_namespace(
145228
&self,
146-
_namespace: &NamespaceIdent,
147-
_properties: HashMap<String, String>,
229+
namespace: &NamespaceIdent,
230+
properties: HashMap<String, String>,
148231
) -> Result<()> {
149-
todo!()
232+
let db = convert_to_database(namespace, &properties)?;
233+
234+
let name = match &db.name {
235+
Some(name) => name,
236+
None => {
237+
return Err(Error::new(
238+
ErrorKind::DataInvalid,
239+
"Database name must be specified",
240+
))
241+
}
242+
};
243+
244+
self.client
245+
.0
246+
.alter_database(name.clone(), db)
247+
.await
248+
.map_err(from_thrift_error)?;
249+
250+
Ok(())
150251
}
151252

152-
async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> {
153-
todo!()
253+
/// Asynchronously drops a namespace from the Hive Metastore.
254+
///
255+
/// # Returns
256+
/// A `Result<()>` indicating the outcome:
257+
/// - `Ok(())` signifies successful namespace deletion.
258+
/// - `Err(...)` signifies failure to drop the namespace due to validation
259+
/// errors, connectivity issues, or Hive Metastore constraints.
260+
async fn drop_namespace(&self, namespace: &NamespaceIdent) -> Result<()> {
261+
let name = validate_namespace(namespace)?;
262+
263+
self.client
264+
.0
265+
.drop_database(name.into(), false, false)
266+
.await
267+
.map_err(from_thrift_error)?;
268+
269+
Ok(())
154270
}
155271

156-
async fn list_tables(&self, _namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
157-
todo!()
272+
async fn list_tables(&self, namespace: &NamespaceIdent) -> Result<Vec<TableIdent>> {
273+
let name = validate_namespace(namespace)?;
274+
275+
let tables = self
276+
.client
277+
.0
278+
.get_all_tables(name.clone().into())
279+
.await
280+
.map_err(from_thrift_error)?;
281+
282+
let tables = tables
283+
.iter()
284+
.map(|table| TableIdent::new(namespace.clone(), table.to_string()))
285+
.collect();
286+
287+
Ok(tables)
158288
}
159289

160290
async fn create_table(

0 commit comments

Comments
 (0)