From ed71e6e6e846d4969798d4ef75911e2d5db21259 Mon Sep 17 00:00:00 2001 From: senmiaoliu Date: Tue, 10 Oct 2023 19:41:43 +0800 Subject: [PATCH 1/4] spark engine adaptive pool size --- docs/configuration/settings.md | 520 ++++++++---------- .../kyuubi/engine/spark/SparkSQLEngine.scala | 53 +- .../org/apache/kyuubi/config/KyuubiConf.scala | 19 +- .../ha/client/EngineServiceDiscovery.scala | 5 + .../ZookeeperDiscoveryClientSuite.scala | 1 - .../org/apache/kyuubi/engine/EngineRef.scala | 49 ++ 6 files changed, 366 insertions(+), 281 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index c1e418d6249..44bc3623142 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -33,7 +33,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | Key | Default | Meaning | Type | Since | |-----------------------------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-------| -| kyuubi.authentication | NONE | A comma-separated list of client authentication types.The following tree describes the catalog of each option. Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported at the same time, and only the first specified PLAIN auth type is valid. | seq | 1.0.0 | +| kyuubi.authentication | NONE | A comma-separated list of client authentication types.The following tree describes the catalog of each option. Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported at the same time, and only the first specified PLAIN auth type is valid. | set | 1.0.0 | | kyuubi.authentication.custom.class | <undefined> | User-defined authentication implementation of org.apache.kyuubi.service.authentication.PasswdAuthenticationProvider | string | 1.3.0 | | kyuubi.authentication.jdbc.driver.class | <undefined> | Driver class name for JDBC Authentication Provider. | string | 1.6.0 | | kyuubi.authentication.jdbc.password | <undefined> | Database password for JDBC Authentication Provider. | string | 1.6.0 | @@ -120,108 +120,92 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Engine -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| -| kyuubi.engine.chat.ernie.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the ernie bot server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | -| kyuubi.engine.chat.ernie.http.proxy | <undefined> | HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088 | string | 1.9.0 | -| kyuubi.engine.chat.ernie.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after ernie bot server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | -| kyuubi.engine.chat.ernie.model | completions | ID of the model used in ernie bot. Available models are completions_pro, ernie_bot_8k, completions and eb-instant[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a). | string | 1.9.0 | -| kyuubi.engine.chat.ernie.token | <undefined> | The token to access ernie bot open API, which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5 | string | 1.9.0 | -| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | -| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | -| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates: | string | 1.8.0 | -| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | -| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | -| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | -| kyuubi.engine.doAs.enabled | true | Whether to enable user impersonation on launching engine. When enabled, for engines which supports user impersonation, e.g. SPARK, depends on the `kyuubi.engine.share.level`, different users will be used to launch the engine. Otherwise, Kyuubi Server's user will always be used to launch the engine. | boolean | 1.9.0 | -| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger. | string | 1.3.0 | -| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go. Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | -| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | -| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.initialize.sql | SHOW DATABASES | The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | -| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.hive.deploy.mode | LOCAL | Configures the hive engine deploy mode, The value can be 'local', 'yarn'. In local mode, the engine operates on the same node as the KyuubiServer. In YARN mode, the engine runs within the Application Master (AM) container of YARN. | string | 1.9.0 | -| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go. | seq | 1.7.0 | -| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | -| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | -| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | -| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | -| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
  • doris: For establishing Doris connections.
  • mysql: For establishing MySQL connections.
  • phoenix: For establishing Phoenix connections.
  • postgresql: For establishing PostgreSQL connections.
  • starrocks: For establishing StarRocks connections.
  • impala: For establishing Impala connections.
  • clickhouse: For establishing clickhouse connections.
  • | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.deploy.mode | LOCAL | Configures the jdbc engine deploy mode, The value can be 'local', 'yarn'. In local mode, the engine operates on the same node as the KyuubiServer. In YARN mode, the engine runs within the Application Master (AM) container of YARN. | string | 1.10.0 | -| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | -| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | -| kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | -| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | -| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the JDBC engine. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | -| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | -| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | -| kyuubi.engine.keytab | <undefined> | Kerberos keytab for the kyuubi engine. | string | 1.10.0 | -| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | -| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | -| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | -| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | -| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session. | string | 1.7.0 | -| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | -| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | -| kyuubi.engine.principal | <undefined> | Kerberos principal for the kyuubi engine. | string | 1.10.0 | -| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | -| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are: See also `kyuubi.engine.share.level.subdomain` and `kyuubi.engine.doAs.enabled`. | string | 1.2.0 | -| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | -| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | -| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | -| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go. | seq | 1.7.0 | -| kyuubi.engine.spark.initialize.sql | SHOW DATABASES | The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | -| kyuubi.engine.spark.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the Spark driver. Note that, kyuubi.operation.result.max.rows will be ignored on incremental collect mode. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | -| kyuubi.engine.spark.output.mode | AUTO | The output mode of Spark engine: | string | 1.9.0 | -| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | -| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | -| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | -| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | -| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.user | <undefined> | The user used for connecting to trino cluster | string | 1.9.0 | -| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go. | seq | 1.7.0 | -| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | -| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.trino.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the trino. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | -| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are: | string | 1.4.0 | -| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | -| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | -| kyuubi.engine.yarn.app.name | <undefined> | The YARN app name when the engine deploy mode is YARN. | string | 1.9.0 | -| kyuubi.engine.yarn.cores | 1 | kyuubi engine container core number when the engine deploy mode is YARN. | int | 1.9.0 | -| kyuubi.engine.yarn.java.options | <undefined> | The extra Java options for the AM when the engine deploy mode is YARN. | string | 1.9.0 | -| kyuubi.engine.yarn.memory | 1024 | kyuubi engine container memory in mb when the engine deploy mode is YARN. | int | 1.9.0 | -| kyuubi.engine.yarn.priority | <undefined> | kyuubi engine yarn priority when the engine deploy mode is YARN. | int | 1.9.0 | -| kyuubi.engine.yarn.queue | default | kyuubi engine yarn queue when the engine deploy mode is YARN. | string | 1.9.0 | -| kyuubi.engine.yarn.stagingDir | <undefined> | Staging directory used while submitting kyuubi engine to YARN, It should be a absolute path in HDFS. | string | 1.9.0 | -| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | -| kyuubi.engine.yarn.tags | <undefined> | kyuubi engine yarn tags when the engine deploy mode is YARN. | seq | 1.9.0 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.engine.chat.ernie.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the ernie bot server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.http.proxy | <undefined> | HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088 | string | 1.9.0 | +| kyuubi.engine.chat.ernie.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after ernie bot server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.model | completions | ID of the model used in ernie bot. Available models are completions_pro, ernie_bot_8k, completions and eb-instant[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a). | string | 1.9.0 | +| kyuubi.engine.chat.ernie.token | <undefined> | The token to access ernie bot open API, which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5 | string | 1.9.0 | +| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | +| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | +| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates: | string | 1.8.0 | +| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | +| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | +| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | +| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger. | string | 1.3.0 | +| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go. Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | +| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | +| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.initialize.sql | SHOW DATABASES | The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go. | seq | 1.7.0 | +| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | +| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | +| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | +| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | +| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
  • doris: For establishing Doris connections.
  • mysql: For establishing MySQL connections.
  • phoenix: For establishing Phoenix connections.
  • postgresql: For establishing PostgreSQL connections.
  • | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | +| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | +| kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | +| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | +| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | +| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | +| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | +| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | +| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | +| kyuubi.engine.pool.adaptive.session.threshold | 10 | The threshold of a engine open session count for adaptive engine pool select policy. | int | 1.9.0 | +| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | +| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session. | string | 1.7.0 | +| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | +| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | +| kyuubi.engine.report.interval | PT1M | The check interval for engine report to the server | duration | 1.9.0 | +| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | +| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are: | string | 1.2.0 | +| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | +| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | +| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | +| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go. | seq | 1.7.0 | +| kyuubi.engine.spark.initialize.sql | SHOW DATABASES | The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | +| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | +| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | +| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | +| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go. | seq | 1.7.0 | +| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | +| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are: | string | 1.4.0 | +| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | +| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | +| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | ### Event @@ -233,70 +217,66 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Frontend -| Key | Default | Meaning | Type | Since | -|------------------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| -| kyuubi.frontend.advertised.host | <undefined> | Hostname or IP of the Kyuubi server's frontend services to publish to external systems such as the service discovery ensemble and metadata store. Use it when you want to advertise a different hostname or IP than the bind host. | string | 1.8.0 | -| kyuubi.frontend.bind.host | <undefined> | Hostname or IP of the machine on which to run the frontend services. | string | 1.0.0 | -| kyuubi.frontend.bind.port | 10009 | (deprecated) Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.0.0 | -| kyuubi.frontend.connection.url.use.hostname | true | When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issues. | boolean | 1.5.0 | -| kyuubi.frontend.max.message.size | 104857600 | (deprecated) Maximum message size in bytes a Kyuubi server will accept. | int | 1.0.0 | -| kyuubi.frontend.max.worker.threads | 999 | (deprecated) Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | -| kyuubi.frontend.min.worker.threads | 9 | (deprecated) Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | -| kyuubi.frontend.mysql.bind.host | <undefined> | Hostname or IP of the machine on which to run the MySQL frontend service. | string | 1.4.0 | -| kyuubi.frontend.mysql.bind.port | 3309 | Port of the machine on which to run the MySQL frontend service. | int | 1.4.0 | -| kyuubi.frontend.mysql.max.worker.threads | 999 | Maximum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | -| kyuubi.frontend.mysql.min.worker.threads | 9 | Minimum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | -| kyuubi.frontend.mysql.netty.worker.threads | <undefined> | Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default. | int | 1.4.0 | -| kyuubi.frontend.mysql.worker.keepalive.time | PT1M | Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service | duration | 1.4.0 | -| kyuubi.frontend.protocols | THRIFT_BINARY,REST | A comma-separated list for all frontend protocols | seq | 1.4.0 | -| kyuubi.frontend.proxy.http.client.ip.header | X-Real-IP | The HTTP header to record the real client IP address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any IP address, so it will not be used for authentication. | string | 1.6.0 | -| kyuubi.frontend.rest.bind.host | <undefined> | Hostname or IP of the machine on which to run the REST frontend service. | string | 1.4.0 | -| kyuubi.frontend.rest.bind.port | 10099 | Port of the machine on which to run the REST frontend service. | int | 1.4.0 | -| kyuubi.frontend.rest.jetty.stopTimeout | PT5S | Stop timeout for Jetty server used by the RESTful frontend service. | duration | 1.8.1 | -| kyuubi.frontend.rest.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the rest frontend service | int | 1.6.2 | -| kyuubi.frontend.rest.proxy.jetty.client.idleTimeout | PT30S | The idle timeout in milliseconds for Jetty server used by the RESTful frontend service. | duration | 1.10.0 | -| kyuubi.frontend.rest.proxy.jetty.client.maxConnections | 32768 | The max number of connections per destination for Jetty server used by the RESTful frontend service. | int | 1.10.0 | -| kyuubi.frontend.rest.proxy.jetty.client.maxThreads | 256 | The max number of threads of HttpClient's Executor for Jetty server used by the RESTful frontend service. | int | 1.10.0 | -| kyuubi.frontend.rest.proxy.jetty.client.requestBufferSize | 4096 | Size of the buffer in bytes used to write requests for Jetty server used by the RESTful frontend service. | int | 1.10.0 | -| kyuubi.frontend.rest.proxy.jetty.client.responseBufferSize | 4096 | Size of the buffer in bytes used to read response for Jetty server used by the RESTful frontend service. | int | 1.10.0 | -| kyuubi.frontend.rest.proxy.jetty.client.timeout | PT60S | The total timeout in milliseconds for Jetty server used by the RESTful frontend service. | duration | 1.10.0 | -| kyuubi.frontend.rest.ui.enabled | true | Whether to enable Web UI when RESTful protocol is enabled | boolean | 1.10.0 | -| kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.type | <undefined> | SSL certificate keystore type. | string | 1.7.0 | -| kyuubi.frontend.thrift.binary.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via the binary protocol. | string | 1.4.0 | -| kyuubi.frontend.thrift.binary.bind.port | 10009 | Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.4.0 | -| kyuubi.frontend.thrift.binary.ssl.disallowed.protocols | SSLv2,SSLv3 | SSL versions to disallow for Kyuubi thrift binary frontend. | set | 1.7.0 | -| kyuubi.frontend.thrift.binary.ssl.enabled | false | Set this to true for using SSL encryption in thrift binary frontend server. | boolean | 1.7.0 | -| kyuubi.frontend.thrift.binary.ssl.include.ciphersuites || A comma-separated list of include SSL cipher suite names for thrift binary frontend. | seq | 1.7.0 | -| kyuubi.frontend.thrift.http.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via http protocol. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.bind.port | 10010 | Port of the machine on which to run the thrift frontend service via http protocol. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.compression.enabled | true | Enable thrift http compression via Jetty compression support | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.auth.enabled | true | When true, Kyuubi in HTTP transport mode, will use cookie-based authentication mechanism | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.domain | <undefined> | Domain for the Kyuubi generated cookies | string | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.is.httponly | true | HttpOnly attribute of the Kyuubi generated cookie. | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.max.age | 86400 | Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.path | <undefined> | Path for the Kyuubi generated cookies | string | 1.6.0 | -| kyuubi.frontend.thrift.http.max.idle.time | PT30M | Maximum idle time for a connection on the server when in HTTP mode. | duration | 1.6.0 | -| kyuubi.frontend.thrift.http.path | cliservice | Path component of URL endpoint when in HTTP mode. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.request.header.size | 6144 | Request header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.response.header.size | 6144 | Response header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites || A comma-separated list of exclude SSL cipher suite names for thrift http frontend. | seq | 1.7.0 | -| kyuubi.frontend.thrift.http.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.protocol.blacklist | SSLv2,SSLv3 | SSL Versions to disable when using HTTP transport mode. | seq | 1.6.0 | -| kyuubi.frontend.thrift.http.use.SSL | false | Set this to true for using SSL encryption in http mode. | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.xsrf.filter.enabled | false | If enabled, Kyuubi will block any requests made to it over HTTP if an X-XSRF-HEADER header is not present | boolean | 1.6.0 | -| kyuubi.frontend.thrift.max.message.size | 104857600 | Maximum message size in bytes a Kyuubi server will accept. | int | 1.4.0 | -| kyuubi.frontend.thrift.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | -| kyuubi.frontend.thrift.min.worker.threads | 9 | Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | -| kyuubi.frontend.thrift.worker.keepalive.time | PT1M | Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.4.0 | -| kyuubi.frontend.trino.bind.host | <undefined> | Hostname or IP of the machine on which to run the TRINO frontend service. | string | 1.7.0 | -| kyuubi.frontend.trino.bind.port | 10999 | Port of the machine on which to run the TRINO frontend service. | int | 1.7.0 | -| kyuubi.frontend.trino.jetty.stopTimeout | PT5S | Stop timeout for Jetty server used by the Trino frontend service. | duration | 1.8.1 | -| kyuubi.frontend.trino.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the Trino frontend service | int | 1.7.0 | -| kyuubi.frontend.worker.keepalive.time | PT1M | (deprecated) Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.0.0 | +| Key | Default | Meaning | Type | Since | +|--------------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.frontend.advertised.host | <undefined> | Hostname or IP of the Kyuubi server's frontend services to publish to external systems such as the service discovery ensemble and metadata store. Use it when you want to advertise a different hostname or IP than the bind host. | string | 1.8.0 | +| kyuubi.frontend.backoff.slot.length | PT0.1S | (deprecated) Time to back off during login to the thrift frontend service. | duration | 1.0.0 | +| kyuubi.frontend.bind.host | <undefined> | Hostname or IP of the machine on which to run the frontend services. | string | 1.0.0 | +| kyuubi.frontend.bind.port | 10009 | (deprecated) Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.0.0 | +| kyuubi.frontend.connection.url.use.hostname | true | When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issues. | boolean | 1.5.0 | +| kyuubi.frontend.login.timeout | PT20S | (deprecated) Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.0.0 | +| kyuubi.frontend.max.message.size | 104857600 | (deprecated) Maximum message size in bytes a Kyuubi server will accept. | int | 1.0.0 | +| kyuubi.frontend.max.worker.threads | 999 | (deprecated) Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.min.worker.threads | 9 | (deprecated) Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.mysql.bind.host | <undefined> | Hostname or IP of the machine on which to run the MySQL frontend service. | string | 1.4.0 | +| kyuubi.frontend.mysql.bind.port | 3309 | Port of the machine on which to run the MySQL frontend service. | int | 1.4.0 | +| kyuubi.frontend.mysql.max.worker.threads | 999 | Maximum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.min.worker.threads | 9 | Minimum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.netty.worker.threads | <undefined> | Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default. | int | 1.4.0 | +| kyuubi.frontend.mysql.worker.keepalive.time | PT1M | Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service | duration | 1.4.0 | +| kyuubi.frontend.protocols | THRIFT_BINARY,REST | A comma-separated list for all frontend protocols | seq | 1.4.0 | +| kyuubi.frontend.proxy.http.client.ip.header | X-Real-IP | The HTTP header to record the real client IP address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any IP address, so it will not be used for authentication. | string | 1.6.0 | +| kyuubi.frontend.rest.bind.host | <undefined> | Hostname or IP of the machine on which to run the REST frontend service. | string | 1.4.0 | +| kyuubi.frontend.rest.bind.port | 10099 | Port of the machine on which to run the REST frontend service. | int | 1.4.0 | +| kyuubi.frontend.rest.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the rest frontend service | int | 1.6.2 | +| kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.type | <undefined> | SSL certificate keystore type. | string | 1.7.0 | +| kyuubi.frontend.thrift.backoff.slot.length | PT0.1S | Time to back off during login to the thrift frontend service. | duration | 1.4.0 | +| kyuubi.frontend.thrift.binary.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via the binary protocol. | string | 1.4.0 | +| kyuubi.frontend.thrift.binary.bind.port | 10009 | Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.4.0 | +| kyuubi.frontend.thrift.binary.ssl.disallowed.protocols | SSLv2,SSLv3 | SSL versions to disallow for Kyuubi thrift binary frontend. | set | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.enabled | false | Set this to true for using SSL encryption in thrift binary frontend server. | boolean | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.include.ciphersuites || A comma-separated list of include SSL cipher suite names for thrift binary frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.allow.user.substitution | true | Allow alternate user to be specified as part of open connection request when using HTTP transport mode. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via http protocol. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.bind.port | 10010 | Port of the machine on which to run the thrift frontend service via http protocol. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.compression.enabled | true | Enable thrift http compression via Jetty compression support | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.auth.enabled | true | When true, Kyuubi in HTTP transport mode, will use cookie-based authentication mechanism | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.domain | <undefined> | Domain for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.is.httponly | true | HttpOnly attribute of the Kyuubi generated cookie. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.max.age | 86400 | Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.path | <undefined> | Path for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.max.idle.time | PT30M | Maximum idle time for a connection on the server when in HTTP mode. | duration | 1.6.0 | +| kyuubi.frontend.thrift.http.path | cliservice | Path component of URL endpoint when in HTTP mode. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.request.header.size | 6144 | Request header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.response.header.size | 6144 | Response header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites || A comma-separated list of exclude SSL cipher suite names for thrift http frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.protocol.blacklist | SSLv2,SSLv3 | SSL Versions to disable when using HTTP transport mode. | seq | 1.6.0 | +| kyuubi.frontend.thrift.http.use.SSL | false | Set this to true for using SSL encryption in http mode. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.xsrf.filter.enabled | false | If enabled, Kyuubi will block any requests made to it over HTTP if an X-XSRF-HEADER header is not present | boolean | 1.6.0 | +| kyuubi.frontend.thrift.login.timeout | PT20S | Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.4.0 | +| kyuubi.frontend.thrift.max.message.size | 104857600 | Maximum message size in bytes a Kyuubi server will accept. | int | 1.4.0 | +| kyuubi.frontend.thrift.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.min.worker.threads | 9 | Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.worker.keepalive.time | PT1M | Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.4.0 | +| kyuubi.frontend.trino.bind.host | <undefined> | Hostname or IP of the machine on which to run the TRINO frontend service. | string | 1.7.0 | +| kyuubi.frontend.trino.bind.port | 10999 | Port of the machine on which to run the TRINO frontend service. | int | 1.7.0 | +| kyuubi.frontend.trino.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the Trino frontend service | int | 1.7.0 | +| kyuubi.frontend.worker.keepalive.time | PT1M | (deprecated) Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.0.0 | ### Ha @@ -310,7 +290,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.ha.etcd.ssl.client.key.path | <undefined> | Where the etcd SSL key file is stored. | string | 1.6.0 | | kyuubi.ha.etcd.ssl.enabled | false | When set to true, will build an SSL secured etcd client. | boolean | 1.6.0 | | kyuubi.ha.namespace | kyuubi | The root directory for the service to deploy its instance uri | string | 1.6.0 | -| kyuubi.ha.zookeeper.acl.enabled | false | (deprecated) Set to true if the ZooKeeper ensemble is kerberized | boolean | 1.0.0 | +| kyuubi.ha.zookeeper.acl.enabled | false | Set to true if the ZooKeeper ensemble is kerberized | boolean | 1.0.0 | | kyuubi.ha.zookeeper.auth.digest | <undefined> | The digest auth string is used for ZooKeeper authentication, like: username:password. | string | 1.3.2 | | kyuubi.ha.zookeeper.auth.keytab | <undefined> | Location of the Kyuubi server's keytab that is used for ZooKeeper authentication. | string | 1.3.2 | | kyuubi.ha.zookeeper.auth.principal | <undefined> | Kerberos principal name that is used for ZooKeeper authentication. | string | 1.3.2 | @@ -339,27 +319,26 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Kubernetes -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------------------|----------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| -| kyuubi.kubernetes.application.state.container | spark-kubernetes-driver | The container name to retrieve the application state from. | string | 1.8.1 | -| kyuubi.kubernetes.application.state.source | POD | The source to retrieve the application state from. The valid values are pod and container. If the source is container and there is container inside the pod with the name of kyuubi.kubernetes.application.state.container, the application state will be from the matched container state. Otherwise, the application state will be from the pod state. | string | 1.8.1 | -| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | -| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | -| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | -| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | -| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | -| kyuubi.kubernetes.spark.appUrlPattern | http://{{SPARK_DRIVER_SVC}}.{{KUBERNETES_NAMESPACE}}.svc:{{SPARK_UI_PORT}} | The pattern to generate the spark on kubernetes application UI URL. The pattern should contain placeholders for the application variables. Available placeholders are `{{SPARK_APP_ID}}`, `{{SPARK_DRIVER_SVC}}`, `{{KUBERNETES_NAMESPACE}}`, `{{KUBERNETES_CONTEXT}}` and `{{SPARK_UI_PORT}}`. | string | 1.10.0 | -| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval | PT1M | Kyuubi server use guava cache as the cleanup trigger with time-based eviction, but the eviction would not happened until any get/put operation happened. This option schedule a daemon thread evict cache periodically. | duration | 1.8.1 | -| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | -| kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | -| kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | -| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | -| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------------------|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.kubernetes.application.state.container | spark-kubernetes-driver | The container name to retrieve the application state from. | string | 1.8.1 | +| kyuubi.kubernetes.application.state.source | POD | The source to retrieve the application state from. The valid values are pod and container. If the source is container and there is container inside the pod with the name of kyuubi.kubernetes.application.state.container, the application state will be from the matched container state. Otherwise, the application state will be from the pod state. | string | 1.8.1 | +| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | +| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | +| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | +| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | +| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval | PT1M | Kyuubi server use guava cache as the cleanup trigger with time-based eviction, but the eviction would not happened until any get/put operation happened. This option schedule a daemon thread evict cache periodically. | duration | 1.8.1 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | +| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | ### Lineage @@ -369,24 +348,24 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Metadata -| Key | Default | Meaning | Type | Since | -|-------------------------------------------------|----------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | -| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | -| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | -| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | -| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | -| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
    • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
    • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
    • POSTGRESQL: PostgreSQL, JDBC driver `org.postgresql.Driver`.
    • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
    • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | -| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:<KYUUBI_HOME>/kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across Kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. Note: this value support the variables substitution: ``. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | +| Key | Default | Meaning | Type | Since | +|-------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | +| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | +| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | +| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | +| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | +| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
      • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
      • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
      • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
      • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
      • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics @@ -405,7 +384,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | Key | Default | Meaning | Type | Since | |--------------------------------------------------|---------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by ignoring `tableTypes` query criteria, and returning table identities only. | boolean | 1.8.0 | +| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by returning table identities only. | boolean | 1.8.0 | | kyuubi.operation.idle.timeout | PT3H | Operation will be closed when it's not accessed for this duration of time | duration | 1.0.0 | | kyuubi.operation.interrupt.on.cancel | true | When true, all running tasks will be interrupted if one cancels a query. When false, all running tasks will remain until finished. | boolean | 1.2.0 | | kyuubi.operation.language | SQL | Choose a programing language for the following inputs
        • SQL: (Default) Run all following statements as SQL queries.
        • SCALA: Run all following input as scala codes
        • PYTHON: (Experimental) Run all following input as Python codes with Spark engine
        | string | 1.5.0 | @@ -435,7 +414,6 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.server.limit.batch.connections.per.user | <undefined> | Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | | kyuubi.server.limit.batch.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.7.0 | | kyuubi.server.limit.client.fetch.max.rows | <undefined> | Max rows limit for getting result row set operation. If the max rows specified by client-side is larger than the limit, request will fail directly. | int | 1.8.0 | -| kyuubi.server.limit.connections.ip.deny.list || The client ip in the deny list will be denied to connect to kyuubi server. | set | 1.9.1 | | kyuubi.server.limit.connections.per.ipaddress | <undefined> | Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | | kyuubi.server.limit.connections.per.user | <undefined> | Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | | kyuubi.server.limit.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.6.0 | @@ -447,58 +425,56 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Session -| Key | Default | Meaning | Type | Since | -|---------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | -| kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | -| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | seq | 1.5.0 | -| kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | -| kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | -| kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | -| kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | -| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.1 | -| kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | -| kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | -| kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | -| kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | -| kyuubi.session.engine.flink.fetch.timeout | <undefined> | Result fetch timeout for Flink engine. If the timeout is reached, the result fetch would be stopped and the current fetched would be returned. If no data are fetched, a TimeoutException would be thrown. | duration | 1.8.0 | -| kyuubi.session.engine.flink.initialize.sql || The initialize sql for Flink session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | -| kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | -| kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | -| kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | -| kyuubi.session.engine.idle.timeout | PT30M | engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate. | duration | 1.0.0 | -| kyuubi.session.engine.initialize.timeout | PT3M | Timeout for starting the background engine, e.g. SparkSQLEngine. | duration | 1.0.0 | -| kyuubi.session.engine.launch.async | true | When opening kyuubi session, whether to launch the backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously. | boolean | 1.4.0 | -| kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | -| kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | -| kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | -| kyuubi.session.engine.open.onFailure | RETRY | The behavior when opening engine failed:
        • RETRY: retry to open engine for kyuubi.session.engine.open.max.attempts times.
        • DEREGISTER_IMMEDIATELY: deregister the engine immediately.
        • DEREGISTER_AFTER_RETRY: deregister the engine after retry to open engine for kyuubi.session.engine.open.max.attempts times.
        | string | 1.8.1 | -| kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | -| kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | -| kyuubi.session.engine.spark.initialize.sql || The initialize sql for Spark session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | -| kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | -| kyuubi.session.engine.spark.max.initial.wait | PT1M | Max wait time for the initial connection to Spark engine. The engine will self-terminate no new incoming connection is established within this time. This setting only applies at the CONNECTION share level. 0 or negative means not to self-terminate. | duration | 1.8.0 | -| kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | -| kyuubi.session.engine.spark.max.lifetime.gracefulPeriod | PT0S | Graceful period for Spark engine to wait the connections disconnected after reaching the end of life. After the graceful period, all the connections without running operations will be forcibly disconnected. 0 or negative means always waiting the connections disconnected. | duration | 1.8.1 | -| kyuubi.session.engine.spark.progress.timeFormat | yyyy-MM-dd HH:mm:ss.SSS | The time format of the progress bar | string | 1.6.0 | -| kyuubi.session.engine.spark.progress.update.interval | PT1S | Update period of progress bar. | duration | 1.6.0 | -| kyuubi.session.engine.spark.showProgress | false | When true, show the progress bar in the Spark's engine log. | boolean | 1.6.0 | -| kyuubi.session.engine.startup.destroy.timeout | PT5S | Engine startup process destroy wait time, if the process does not stop after this time, force destroy instead. This configuration only takes effect when `kyuubi.session.engine.startup.waitCompletion=false`. | duration | 1.8.0 | -| kyuubi.session.engine.startup.error.max.size | 8192 | During engine bootstrapping, if anderror occurs, using this config to limit the length of error message(characters). | int | 1.1.0 | -| kyuubi.session.engine.startup.maxLogLines | 10 | The maximum number of engine log lines when errors occur during the engine startup phase. Note that this config effects on client-side to help track engine startup issues. | int | 1.4.0 | -| kyuubi.session.engine.startup.waitCompletion | true | Whether to wait for completion after the engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as in yarn-cluster mode; Otherwise, the engine will be killed. | boolean | 1.5.0 | -| kyuubi.session.engine.trino.connection.catalog | <undefined> | The default catalog that Trino engine will connect to | string | 1.5.0 | -| kyuubi.session.engine.trino.connection.url | <undefined> | The server url that Trino engine will connect to | string | 1.5.0 | -| kyuubi.session.engine.trino.main.resource | <undefined> | The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default | string | 1.5.0 | -| kyuubi.session.engine.trino.showProgress | true | When true, show the progress bar and final info in the Trino engine log. | boolean | 1.6.0 | -| kyuubi.session.engine.trino.showProgress.debug | false | When true, show the progress debug info in the Trino engine log. | boolean | 1.6.0 | -| kyuubi.session.group.provider | hadoop | A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different users or session configs. This config value should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which has a zero-arg constructor. Kyuubi provides the following built-in implementations:
      • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
      • | string | 1.7.0 | -| kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | -| kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | -| kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | -| kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | -| kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | -| kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | +| Key | Default | Meaning | Type | Since | +|------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | +| kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | +| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | seq | 1.5.0 | +| kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | +| kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | +| kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | +| kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | +| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.0 | +| kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | +| kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | +| kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | +| kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | +| kyuubi.session.engine.flink.fetch.timeout | <undefined> | Result fetch timeout for Flink engine. If the timeout is reached, the result fetch would be stopped and the current fetched would be returned. If no data are fetched, a TimeoutException would be thrown. | duration | 1.8.0 | +| kyuubi.session.engine.flink.initialize.sql || The initialize sql for Flink session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | +| kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | +| kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | +| kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | +| kyuubi.session.engine.idle.timeout | PT30M | engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate. | duration | 1.0.0 | +| kyuubi.session.engine.initialize.timeout | PT3M | Timeout for starting the background engine, e.g. SparkSQLEngine. | duration | 1.0.0 | +| kyuubi.session.engine.launch.async | true | When opening kyuubi session, whether to launch the backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously. | boolean | 1.4.0 | +| kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | +| kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | +| kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | +| kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | +| kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | +| kyuubi.session.engine.spark.initialize.sql || The initialize sql for Spark session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | +| kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | +| kyuubi.session.engine.spark.max.initial.wait | PT1M | Max wait time for the initial connection to Spark engine. The engine will self-terminate no new incoming connection is established within this time. This setting only applies at the CONNECTION share level. 0 or negative means not to self-terminate. | duration | 1.8.0 | +| kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | +| kyuubi.session.engine.spark.progress.timeFormat | yyyy-MM-dd HH:mm:ss.SSS | The time format of the progress bar | string | 1.6.0 | +| kyuubi.session.engine.spark.progress.update.interval | PT1S | Update period of progress bar. | duration | 1.6.0 | +| kyuubi.session.engine.spark.showProgress | false | When true, show the progress bar in the Spark's engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.startup.destroy.timeout | PT5S | Engine startup process destroy wait time, if the process does not stop after this time, force destroy instead. This configuration only takes effect when `kyuubi.session.engine.startup.waitCompletion=false`. | duration | 1.8.0 | +| kyuubi.session.engine.startup.error.max.size | 8192 | During engine bootstrapping, if anderror occurs, using this config to limit the length of error message(characters). | int | 1.1.0 | +| kyuubi.session.engine.startup.maxLogLines | 10 | The maximum number of engine log lines when errors occur during the engine startup phase. Note that this config effects on client-side to help track engine startup issues. | int | 1.4.0 | +| kyuubi.session.engine.startup.waitCompletion | true | Whether to wait for completion after the engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as in yarn-cluster mode; Otherwise, the engine will be killed. | boolean | 1.5.0 | +| kyuubi.session.engine.trino.connection.catalog | <undefined> | The default catalog that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.connection.url | <undefined> | The server url that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.main.resource | <undefined> | The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default | string | 1.5.0 | +| kyuubi.session.engine.trino.showProgress | true | When true, show the progress bar and final info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.trino.showProgress.debug | false | When true, show the progress debug info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.group.provider | hadoop | A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different users or session configs. This config value should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which has a zero-arg constructor. Kyuubi provides the following built-in implementations:
      • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
      • | string | 1.7.0 | +| kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | +| kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | +| kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | +| kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | +| kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | +| kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | ### Spnego @@ -523,11 +499,11 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.zookeeper.embedded.client.use.hostname | false | When true, embedded Zookeeper prefer to bind hostname, otherwise, ip address. | boolean | 1.7.2 | | kyuubi.zookeeper.embedded.data.dir | embedded_zookeeper | dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | | kyuubi.zookeeper.embedded.data.log.dir | embedded_zookeeper | dataLogDir for the embedded ZooKeeper server where writes the transaction log. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | -| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | (deprecated) The temporary directory for the embedded ZooKeeper server. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.0.0 | +| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | The temporary directory for the embedded ZooKeeper server. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.0.0 | | kyuubi.zookeeper.embedded.max.client.connections | 120 | maxClientCnxns for the embedded ZooKeeper server to limit the number of concurrent connections of a single client identified by IP address | int | 1.2.0 | | kyuubi.zookeeper.embedded.max.session.timeout | 60000 | maxSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 20 times the tickTime | int | 1.2.0 | | kyuubi.zookeeper.embedded.min.session.timeout | 6000 | minSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 2 times the tickTime | int | 1.2.0 | -| kyuubi.zookeeper.embedded.port | 2181 | (deprecated) The port of the embedded ZooKeeper server | int | 1.0.0 | +| kyuubi.zookeeper.embedded.port | 2181 | The port of the embedded ZooKeeper server | int | 1.0.0 | | kyuubi.zookeeper.embedded.tick.time | 3000 | tickTime in milliseconds for the embedded ZooKeeper server | int | 1.2.0 | ## Spark Configurations @@ -542,11 +518,7 @@ Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` supplies with default v ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: - -``` -jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g -``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g``` - **Runtime SQL Configuration** - For [Runtime SQL Configurations](https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they will take affect every time @@ -578,11 +550,7 @@ The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2` an ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: - -``` -jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g -``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g``` ### Via SET Statements @@ -609,11 +577,7 @@ The below options in `kyuubi-defaults.conf` will set `query_max_stage_count: 500 ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: - -``` -jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true -``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true``` ### Via SET Statements diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index d1331cd0284..ffc45404f5e 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -43,7 +43,7 @@ import org.apache.kyuubi.engine.spark.events.{EngineEvent, EngineEventsStore, Sp import org.apache.kyuubi.engine.spark.session.{SparkSessionImpl, SparkSQLSessionManager} import org.apache.kyuubi.events.EventBus import org.apache.kyuubi.ha.HighAvailabilityConf._ -import org.apache.kyuubi.ha.client.RetryPolicies +import org.apache.kyuubi.ha.client.{DiscoveryClientProvider, RetryPolicies} import org.apache.kyuubi.service.Serverable import org.apache.kyuubi.session.SessionHandle import org.apache.kyuubi.util.{SignalRegister, ThreadUtils} @@ -61,6 +61,7 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin @volatile private var stopEngineExec: Option[ThreadPoolExecutor] = None private lazy val engineSavePath = backendService.sessionManager.asInstanceOf[SparkSQLSessionManager].getEngineResultSavePath() + @volatile private var metricsReporter: Option[ScheduledExecutorService] = None override def initialize(conf: KyuubiConf): Unit = { val listener = new SparkSQLEngineListener(this) @@ -97,6 +98,15 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin fs.mkdirs(path) fs.deleteOnExit(path) } + + if (conf.get(ENGINE_POOL_SELECT_POLICY) == "ADAPTIVE") { + val subdomain = conf.get(ENGINE_SHARE_LEVEL_SUBDOMAIN) + val shareLevel = conf.get(ENGINE_SHARE_LEVEL) + val enginePoolIgnoreSubdomain = conf.get(ENGINE_POOL_IGNORE_SUBDOMAIN) + if (!"CONNECTION".equals(shareLevel) && (subdomain.isEmpty || enginePoolIgnoreSubdomain)) { + startMetricsReporter() + } + } } override def stop(): Unit = if (shutdown.compareAndSet(false, true)) { @@ -112,6 +122,11 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin exec, Duration(60, TimeUnit.SECONDS)) }) + metricsReporter.foreach(reporter => { + ThreadUtils.shutdown( + reporter, + Duration(60, TimeUnit.SECONDS)) + }) try { val path = new Path(engineSavePath) val fs = path.getFileSystem(spark.sparkContext.hadoopConfiguration) @@ -161,6 +176,42 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin } } + private[kyuubi] def startMetricsReporter(): Unit = { + val interval = conf.get(ENGINE_REPORT_INTERVAL) + val engineSpace = conf.get(HA_NAMESPACE) + val statusTracker = spark.sparkContext.statusTracker + val metricsSpace = s"/metrics$engineSpace" + val report: Runnable = () => { + if (!shutdown.get) { + val openSessionCount = backendService.sessionManager.getOpenSessionCount + val activeTask = statusTracker.getActiveStageIds() + .flatMap { stage => + statusTracker.getStageInfo(stage).map(_.numActiveTasks) + }.sum + val engineMetrics = Map( + "openSessionCount" -> openSessionCount, + "activeTask" -> activeTask, + "poolId" -> engineSpace.split("-").last) + info(s"Spark engine has $openSessionCount open sessions and $activeTask active tasks.") + DiscoveryClientProvider.withDiscoveryClient(conf) { client => + if (client.pathNonExists(metricsSpace)) { + client.create(metricsSpace, "PERSISTENT") + } + client.setData( + s"/metrics$engineSpace", + engineMetrics.map { case (k, v) => s"$k=$v" }.mkString(";").getBytes) + } + } + } + metricsReporter = + Some(ThreadUtils.newDaemonSingleThreadScheduledExecutor("spark-engine-metrics-reporter")) + metricsReporter.get.scheduleWithFixedDelay( + report, + interval, + interval, + TimeUnit.MILLISECONDS) + } + override protected def stopServer(): Unit = { countDownLatch.countDown() } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index bf547c5ff77..7a2b324efba 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -2260,13 +2260,30 @@ object KyuubiConf { "a session.
          " + "
        • RANDOM - Randomly use the engine in the pool
        • " + "
        • POLLING - Polling use the engine in the pool
        • " + + "
        • ADAPTIVE - ADAPTIVE use the engine in the pool
        • " + "
        ") .version("1.7.0") .stringConf .transformToUpperCase - .checkValues(Set("RANDOM", "POLLING")) + .checkValues(Set("RANDOM", "POLLING", "ADAPTIVE")) .createWithDefault("RANDOM") + val ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD: ConfigEntry[Int] = + buildConf("kyuubi.engine.pool.adaptive.session.threshold") + .doc("The threshold of a engine open session count for adaptive engine pool select policy.") + .version("1.9.0") + .intConf + .checkValue(_ >= 1, "must be positive number") + .createWithDefault(10) + + val ENGINE_REPORT_INTERVAL: ConfigEntry[Long] = + buildConf("kyuubi.engine.report.interval") + .doc("The check interval for engine report to the server") + .version("1.9.0") + .timeConf + .checkValue(_ >= Duration.ofSeconds(1).toMillis, "Minimum 1 seconds") + .createWithDefault(Duration.ofMinutes(1).toMillis) + val ENGINE_INITIALIZE_SQL: ConfigEntry[Seq[String]] = buildConf("kyuubi.engine.initialize.sql") .doc("SemiColon-separated list of SQL statements to be initialized in the newly created " + diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala index 88388f3ba8a..88b487f9ce3 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala @@ -20,6 +20,7 @@ package org.apache.kyuubi.ha.client import scala.util.control.NonFatal import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL +import org.apache.kyuubi.ha.HighAvailabilityConf.HA_NAMESPACE import org.apache.kyuubi.service.FrontendService /** @@ -33,6 +34,10 @@ class EngineServiceDiscovery( override def stop(): Unit = synchronized { if (!isServerLost.get()) { discoveryClient.deregisterService() + val path = s"/metrics${conf.get(HA_NAMESPACE)}" + if (discoveryClient.pathExists(path)) { + discoveryClient.delete(path) + } conf.get(ENGINE_SHARE_LEVEL) match { // For connection level, we should clean up the namespace in zk in case the disk stress. case "CONNECTION" => diff --git a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala index 34ed0559383..1326adc6e3f 100644 --- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala +++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/zookeeper/ZookeeperDiscoveryClientSuite.scala @@ -224,7 +224,6 @@ abstract class ZookeeperDiscoveryClientSuite extends DiscoveryClientTests } } finally { service.stop() - discovery.stop() } } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index bb7f7ecbcf4..b41d7aae392 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -119,6 +119,10 @@ private[kyuubi] class EngineRef( DiscoveryClientProvider.withDiscoveryClient(conf) { client => client.getAndIncrement(snPath) } + + case "ADAPTIVE" => + getPoolId(clientPoolSize) + case "RANDOM" => Random.nextInt(poolSize) } @@ -373,4 +377,49 @@ private[kyuubi] class EngineRef( } } } + + def getPoolId(poolSize: Int): Int = { + val sessionThreshold = conf.get(ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD) + val metricsSpace = + s"/metrics/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_${engineType}/$user" + DiscoveryClientProvider.withDiscoveryClient(conf) { client => + tryWithLock(client) { + if (client.pathNonExists(metricsSpace)) { + client.create(metricsSpace, "PERSISTENT") + } + } + val metrics = client.getChildren(metricsSpace) + if (metrics.isEmpty) { + return Random.nextInt(poolSize) + } else { + engineType match { + case SPARK_SQL => + val engineMetricsMap = metrics.map(p => + new String(client.getData(s"$metricsSpace/$p")) + .split(";") + .map(_.split("=", 2)) + .filter(_.length == 2) + .map(kv => (kv.head, kv.last.toInt)) + .toMap) + if (engineMetricsMap.isEmpty) { + return Random.nextInt(poolSize) + } + val sortedEngineMetrics = + engineMetricsMap.sortBy(map => + ( + map.getOrElse("openSessionCount", sessionThreshold), + map.getOrElse("activeTask", 0))) + val candidate = sortedEngineMetrics.head + if (candidate.contains("poolId") && (candidate( + "openSessionCount") < sessionThreshold || metrics.size == poolSize)) { + candidate("poolId") + } else { + Random.nextInt(poolSize) + } + // TODO: other engine support adaptive + case _ => Random.nextInt(poolSize) + } + } + } + } } From c4ae3da169175adad5c966571b2f89909cbd5c6b Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Fri, 10 Nov 2023 17:19:31 +0800 Subject: [PATCH 2/4] else --- .../org/apache/kyuubi/engine/EngineRef.scala | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index b41d7aae392..bde0f1ab48a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -121,7 +121,7 @@ private[kyuubi] class EngineRef( } case "ADAPTIVE" => - getPoolId(clientPoolSize) + getAdaptivePoolId(clientPoolSize) case "RANDOM" => Random.nextInt(poolSize) @@ -378,7 +378,7 @@ private[kyuubi] class EngineRef( } } - def getPoolId(poolSize: Int): Int = { + def getAdaptivePoolId(poolSize: Int): Int = { val sessionThreshold = conf.get(ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD) val metricsSpace = s"/metrics/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_${engineType}/$user" @@ -403,18 +403,19 @@ private[kyuubi] class EngineRef( .toMap) if (engineMetricsMap.isEmpty) { return Random.nextInt(poolSize) - } - val sortedEngineMetrics = - engineMetricsMap.sortBy(map => + } else { + val sortedEngineMetrics = engineMetricsMap.sortBy { map => ( map.getOrElse("openSessionCount", sessionThreshold), - map.getOrElse("activeTask", 0))) - val candidate = sortedEngineMetrics.head - if (candidate.contains("poolId") && (candidate( - "openSessionCount") < sessionThreshold || metrics.size == poolSize)) { - candidate("poolId") - } else { - Random.nextInt(poolSize) + map.getOrElse("activeTask", 0)) + } + val candidate = sortedEngineMetrics.head + if (candidate.contains("poolId") && (candidate( + "openSessionCount") < sessionThreshold || metrics.size == poolSize)) { + candidate("poolId") + } else { + Random.nextInt(poolSize) + } } // TODO: other engine support adaptive case _ => Random.nextInt(poolSize) From 80a0d4c9cb620aabcd4e19b7ae6852952965495f Mon Sep 17 00:00:00 2001 From: senmiaoliu Date: Fri, 17 Nov 2023 18:50:17 +0800 Subject: [PATCH 3/4] fix style --- docs/configuration/settings.md | 478 ++++++++++-------- .../kyuubi/engine/spark/SparkSQLEngine.scala | 14 +- .../org/apache/kyuubi/config/KyuubiConf.scala | 4 +- .../org/apache/kyuubi/engine/EngineRef.scala | 38 +- 4 files changed, 285 insertions(+), 249 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 44bc3623142..61d6031c08c 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -33,7 +33,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | Key | Default | Meaning | Type | Since | |-----------------------------------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------|-------| -| kyuubi.authentication | NONE | A comma-separated list of client authentication types.
        • NOSASL: raw transport.
        • NONE: no authentication check.
        • KERBEROS: Kerberos/GSSAPI authentication.
        • CUSTOM: User-defined authentication.
        • JDBC: JDBC query authentication.
        • LDAP: Lightweight Directory Access Protocol authentication.
        The following tree describes the catalog of each option.
        • NOSASL
        • SASL
          • SASL/PLAIN
            • NONE
            • LDAP
            • JDBC
            • CUSTOM
          • SASL/GSSAPI
            • KERBEROS
        Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported at the same time, and only the first specified PLAIN auth type is valid. | set | 1.0.0 | +| kyuubi.authentication | NONE | A comma-separated list of client authentication types.
        • NOSASL: raw transport.
        • NONE: no authentication check.
        • KERBEROS: Kerberos/GSSAPI authentication.
        • CUSTOM: User-defined authentication.
        • JDBC: JDBC query authentication.
        • LDAP: Lightweight Directory Access Protocol authentication.
        The following tree describes the catalog of each option.
        • NOSASL
        • SASL
          • SASL/PLAIN
            • NONE
            • LDAP
            • JDBC
            • CUSTOM
          • SASL/GSSAPI
            • KERBEROS
        Note that: for SASL authentication, KERBEROS and PLAIN auth types are supported at the same time, and only the first specified PLAIN auth type is valid. | seq | 1.0.0 | | kyuubi.authentication.custom.class | <undefined> | User-defined authentication implementation of org.apache.kyuubi.service.authentication.PasswdAuthenticationProvider | string | 1.3.0 | | kyuubi.authentication.jdbc.driver.class | <undefined> | Driver class name for JDBC Authentication Provider. | string | 1.6.0 | | kyuubi.authentication.jdbc.password | <undefined> | Database password for JDBC Authentication Provider. | string | 1.6.0 | @@ -120,92 +120,109 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Engine -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------|---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.engine.chat.ernie.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the ernie bot server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | -| kyuubi.engine.chat.ernie.http.proxy | <undefined> | HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088 | string | 1.9.0 | -| kyuubi.engine.chat.ernie.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after ernie bot server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | -| kyuubi.engine.chat.ernie.model | completions | ID of the model used in ernie bot. Available models are completions_pro, ernie_bot_8k, completions and eb-instant[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a). | string | 1.9.0 | -| kyuubi.engine.chat.ernie.token | <undefined> | The token to access ernie bot open API, which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5 | string | 1.9.0 | -| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | -| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | -| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | -| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | -| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | -| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates:
        • ECHO: simply replies a welcome message.
        • GPT: a.k.a ChatGPT, powered by OpenAI.
        • ERNIE: ErnieBot, powered by Baidu.
        | string | 1.8.0 | -| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | -| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | -| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | -| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | -| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger.
        • Local Path: start with 'file://'
        • HDFS Path: start with 'hdfs://'
        | string | 1.3.0 | -| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • SPARK: the events will be written to the Spark listener bus.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: User-defined event handlers.
        Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | -| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | -| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.initialize.sql | SHOW DATABASES | The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | -| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | -| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | -| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | -| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | -| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | -| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | -| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | -| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
      • doris: For establishing Doris connections.
      • mysql: For establishing MySQL connections.
      • phoenix: For establishing Phoenix connections.
      • postgresql: For establishing PostgreSQL connections.
      • | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | -| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | -| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | -| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | -| kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | -| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | -| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | -| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | -| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | -| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | -| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | -| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | -| kyuubi.engine.pool.adaptive.session.threshold | 10 | The threshold of a engine open session count for adaptive engine pool select policy. | int | 1.9.0 | -| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | -| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session.
        • RANDOM - Randomly use the engine in the pool
        • POLLING - Polling use the engine in the pool
        • ADAPTIVE - ADAPTIVE use the engine in the pool
        | string | 1.7.0 | -| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | -| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | -| kyuubi.engine.report.interval | PT1M | The check interval for engine report to the server | duration | 1.9.0 | -| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | -| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
        • CONNECTION: engine will not be shared but only used by the current client connection
        • USER: engine will be shared by all sessions created by a unique username, see also kyuubi.engine.share.level.subdomain
        • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
        • SERVER: the App will be shared by Kyuubi servers
        | string | 1.2.0 | -| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | -| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | -| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | -| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go.
        • SPARK: the events will be written to the Spark listener bus.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | -| kyuubi.engine.spark.initialize.sql | SHOW DATABASES | The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | -| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | -| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | -| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | -| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | -| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | -| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | -| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | -| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | -| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
        • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
        • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
        • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
        • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
        • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris and Phoenix.
        • CHAT: specify this engine type will launch a Chat engine.
        | string | 1.4.0 | -| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | -| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | -| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | -| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | -| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------|---------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| +| kyuubi.engine.chat.ernie.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the ernie bot server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.http.proxy | <undefined> | HTTP proxy url for API calling in ernie bot engine. e.g. http://127.0.0.1:1088 | string | 1.9.0 | +| kyuubi.engine.chat.ernie.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after ernie bot server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.9.0 | +| kyuubi.engine.chat.ernie.model | completions | ID of the model used in ernie bot. Available models are completions_pro, ernie_bot_8k, completions and eb-instant[Model overview](https://cloud.baidu.com/doc/WENXINWORKSHOP/s/6lp69is2a). | string | 1.9.0 | +| kyuubi.engine.chat.ernie.token | <undefined> | The token to access ernie bot open API, which could be got at https://cloud.baidu.com/doc/WENXINWORKSHOP/s/Ilkkrb0i5 | string | 1.9.0 | +| kyuubi.engine.chat.extra.classpath | <undefined> | The extra classpath for the Chat engine, for configuring the location of the SDK and etc. | string | 1.8.0 | +| kyuubi.engine.chat.gpt.apiKey | <undefined> | The key to access OpenAI open API, which could be got at https://platform.openai.com/account/api-keys | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.connect.timeout | PT2M | The timeout[ms] for establishing the connection with the Chat GPT server. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.http.proxy | <undefined> | HTTP proxy url for API calling in Chat GPT engine. e.g. http://127.0.0.1:1087 | string | 1.8.0 | +| kyuubi.engine.chat.gpt.http.socket.timeout | PT2M | The timeout[ms] for waiting for data packets after Chat GPT server connection is established. A timeout value of zero is interpreted as an infinite timeout. | duration | 1.8.0 | +| kyuubi.engine.chat.gpt.model | gpt-3.5-turbo | ID of the model used in ChatGPT. Available models refer to OpenAI's [Model overview](https://platform.openai.com/docs/models/overview). | string | 1.8.0 | +| kyuubi.engine.chat.java.options | <undefined> | The extra Java options for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.memory | 1g | The heap memory for the Chat engine | string | 1.8.0 | +| kyuubi.engine.chat.provider | ECHO | The provider for the Chat engine. Candidates:
        • ECHO: simply replies a welcome message.
        • GPT: a.k.a ChatGPT, powered by OpenAI.
        • ERNIE: ErnieBot, powered by Baidu.
        | string | 1.8.0 | +| kyuubi.engine.connection.url.use.hostname | true | (deprecated) When true, the engine registers with hostname to zookeeper. When Spark runs on K8s with cluster mode, set to false to ensure that server can connect to engine | boolean | 1.3.0 | +| kyuubi.engine.deregister.exception.classes || A comma-separated list of exception classes. If there is any exception thrown, whose class matches the specified classes, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.messages || A comma-separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself. | set | 1.2.0 | +| kyuubi.engine.deregister.exception.ttl | PT30M | Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures. | duration | 1.2.0 | +| kyuubi.engine.deregister.job.max.failures | 4 | Number of failures of job before deregistering the engine. | int | 1.2.0 | +| kyuubi.engine.doAs.enabled | true | Whether to enable user impersonation on launching engine. When enabled, for engines which supports user impersonation, e.g. SPARK, depends on the `kyuubi.engine.share.level`, different users will be used to launch the engine. Otherwise, Kyuubi Server's user will always be used to launch the engine. | boolean | 1.9.0 | +| kyuubi.engine.event.json.log.path | file:///tmp/kyuubi/events | The location where all the engine events go for the built-in JSON logger.
        • Local Path: start with 'file://'
        • HDFS Path: start with 'hdfs://'
        | string | 1.3.0 | +| kyuubi.engine.event.loggers | SPARK | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • SPARK: the events will be written to the Spark listener bus.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: User-defined event handlers.
        Note that: Kyuubi supports custom event handlers with the Java SPI. To register a custom event handler, the user needs to implement a subclass of `org.apache.kyuubi.events.handler.CustomEventHandlerProvider` which has a zero-arg constructor. | seq | 1.3.0 | +| kyuubi.engine.flink.application.jars | <undefined> | A comma-separated list of the local jars to be shipped with the job to the cluster. For example, SQL UDF jars. Only effective in yarn application mode. | string | 1.8.0 | +| kyuubi.engine.flink.extra.classpath | <undefined> | The extra classpath for the Flink SQL engine, for configuring the location of hadoop client jars, etc. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.initialize.sql | SHOW DATABASES | The initialize sql for Flink engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.flink.java.options | <undefined> | The extra Java options for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.flink.memory | 1g | The heap memory for the Flink SQL engine. Only effective in yarn session mode. | string | 1.6.0 | +| kyuubi.engine.hive.deploy.mode | LOCAL | Configures the hive engine deploy mode, The value can be 'local', 'yarn'. In local mode, the engine operates on the same node as the KyuubiServer. In YARN mode, the engine runs within the Application Master (AM) container of YARN. | string | 1.9.0 | +| kyuubi.engine.hive.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | +| kyuubi.engine.hive.extra.classpath | <undefined> | The extra classpath for the Hive query engine, for configuring location of the hadoop client jars and etc. | string | 1.6.0 | +| kyuubi.engine.hive.java.options | <undefined> | The extra Java options for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.hive.memory | 1g | The heap memory for the Hive query engine | string | 1.6.0 | +| kyuubi.engine.initialize.sql | SHOW DATABASES | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SHOW DATABASES` to eagerly active HiveClient. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.2.0 | +| kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | +| kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | +| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
      • doris: For establishing Doris connections.
      • mysql: For establishing MySQL connections.
      • phoenix: For establishing Phoenix connections.
      • postgresql: For establishing PostgreSQL connections.
      • starrocks: For establishing StarRocks connections.
      • | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | +| kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | +| kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | +| kyuubi.engine.jdbc.initialize.sql | SELECT 1 | SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. i.e. use `SELECT 1` to eagerly active JDBCClient. | seq | 1.8.0 | +| kyuubi.engine.jdbc.java.options | <undefined> | The extra Java options for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.memory | 1g | The heap memory for the JDBC query engine | string | 1.6.0 | +| kyuubi.engine.jdbc.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the JDBC engine. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | +| kyuubi.engine.jdbc.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. | seq | 1.8.0 | +| kyuubi.engine.jdbc.type | <undefined> | The short name of JDBC type | string | 1.6.0 | +| kyuubi.engine.keytab | <undefined> | Kerberos keytab for the kyuubi engine. | string | 1.10.0 | +| kyuubi.engine.kubernetes.submit.timeout | PT30S | The engine submit timeout for Kubernetes application. | duration | 1.7.2 | +| kyuubi.engine.operation.convert.catalog.database.enabled | true | When set to true, The engine converts the JDBC methods of set/get Catalog and set/get Schema to the implementation of different engines | boolean | 1.6.0 | +| kyuubi.engine.operation.log.dir.root | engine_operation_logs | Root directory for query operation log at engine-side. | string | 1.4.0 | +| kyuubi.engine.pool.adaptive.session.threshold | 10 | The threshold of a engine open session count for adaptive engine pool select policy. | int | 1.10.0 | +| kyuubi.engine.pool.name | engine-pool | The name of the engine pool. | string | 1.5.0 | +| kyuubi.engine.pool.selectPolicy | RANDOM | The select policy of an engine from the corresponding engine pool engine for a session.
        • RANDOM - Randomly use the engine in the pool
        • POLLING - Polling use the engine in the pool
        • ADAPTIVE - ADAPTIVE use the engine in the pool
        | string | 1.7.0 | +| kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | +| kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | +| kyuubi.engine.principal | <undefined> | Kerberos principal for the kyuubi engine. | string | 1.10.0 | +| kyuubi.engine.report.interval | PT1M | The check interval for engine report to the server | duration | 1.10.0 | +| kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | +| kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
        • CONNECTION: the engine will not be shared but only used by the current client connection, and the engine will be launched by session user.
        • USER: the engine will be shared by all sessions created by a unique username, and the engine will be launched by session user.
        • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the primary group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
        • SERVER: the engine will be shared by Kyuubi servers, and the engine will be launched by Server's user.
        See also `kyuubi.engine.share.level.subdomain` and `kyuubi.engine.doAs.enabled`. | string | 1.2.0 | +| kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | +| kyuubi.engine.share.level.subdomain | <undefined> | Allow end-users to create a subdomain for the share level of an engine. A subdomain is a case-insensitive string values that must be a valid zookeeper subpath. For example, for the `USER` share level, an end-user can share a certain engine within a subdomain, not for all of its clients. End-users are free to create multiple engines in the `USER` share level. When disable engine pool, use 'default' if absent. | string | 1.4.0 | +| kyuubi.engine.single.spark.session | false | When set to true, this engine is running in a single session mode. All the JDBC/ODBC connections share the temporary views, function registries, SQL configuration and the current database. | boolean | 1.3.0 | +| kyuubi.engine.spark.event.loggers | SPARK | A comma-separated list of engine loggers, where engine/session/operation etc events go.
        • SPARK: the events will be written to the Spark listener bus.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | +| kyuubi.engine.spark.initialize.sql | SHOW DATABASES | The initialize sql for Spark engine. It fallback to `kyuubi.engine.initialize.sql`. | seq | 1.8.1 | +| kyuubi.engine.spark.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the Spark driver. Note that, kyuubi.operation.result.max.rows will be ignored on incremental collect mode. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | +| kyuubi.engine.spark.output.mode | AUTO | The output mode of Spark engine:
        • AUTO: For PySpark, the extracted `text/plain` from python response as output.
        • NOTEBOOK: For PySpark, the original python response as output.
        | string | 1.9.0 | +| kyuubi.engine.spark.python.env.archive | <undefined> | Portable Python env archive used for Spark engine Python language mode. | string | 1.7.0 | +| kyuubi.engine.spark.python.env.archive.exec.path | bin/python | The Python exec path under the Python env archive. | string | 1.7.0 | +| kyuubi.engine.spark.python.home.archive | <undefined> | Spark archive containing $SPARK_HOME/python directory, which is used to init session Python worker for Python language mode. | string | 1.7.0 | +| kyuubi.engine.submit.timeout | PT30S | Period to tolerant Driver Pod ephemerally invisible after submitting. In some Resource Managers, e.g. K8s, the Driver Pod is not visible immediately after `spark-submit` is returned. | duration | 1.7.1 | +| kyuubi.engine.trino.connection.keystore.password | <undefined> | The keystore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.path | <undefined> | The keystore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.keystore.type | <undefined> | The keystore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.password | <undefined> | The password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.password | <undefined> | The truststore password used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.path | <undefined> | The truststore path used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.truststore.type | <undefined> | The truststore type used for connecting to trino cluster | string | 1.8.0 | +| kyuubi.engine.trino.connection.user | <undefined> | The user used for connecting to trino cluster | string | 1.9.0 | +| kyuubi.engine.trino.event.loggers | JSON | A comma-separated list of engine history loggers, where engine/session/operation etc events go.
        • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
        • JDBC: to be done
        • CUSTOM: to be done.
        | seq | 1.7.0 | +| kyuubi.engine.trino.extra.classpath | <undefined> | The extra classpath for the Trino query engine, for configuring other libs which may need by the Trino engine | string | 1.6.0 | +| kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | +| kyuubi.engine.trino.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the trino. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | +| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
        • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
        • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
        • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
        • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
        • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris, MySQL, Phoenix, PostgreSQL and StarRocks.
        • CHAT: specify this engine type will launch a Chat engine.
        | string | 1.4.0 | +| kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | +| kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | +| kyuubi.engine.user.isolated.spark.session | true | When set to false, if the engine is running in a group or server share level, all the JDBC/ODBC connections will be isolated against the user. Including the temporary views, function registries, SQL configuration, and the current database. Note that, it does not affect if the share level is connection or user. | boolean | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.interval | PT1M | The interval to check if the user-isolated Spark session is timeout. | duration | 1.6.0 | +| kyuubi.engine.user.isolated.spark.session.idle.timeout | PT6H | If kyuubi.engine.user.isolated.spark.session is false, we will release the Spark session if its corresponding user is inactive after this configured timeout. | duration | 1.6.0 | +| kyuubi.engine.yarn.app.name | <undefined> | The YARN app name when the engine deploy mode is YARN. | string | 1.9.0 | +| kyuubi.engine.yarn.cores | 1 | kyuubi engine container core number when the engine deploy mode is YARN. | int | 1.9.0 | +| kyuubi.engine.yarn.java.options | <undefined> | The extra Java options for the AM when the engine deploy mode is YARN. | string | 1.9.0 | +| kyuubi.engine.yarn.memory | 1024 | kyuubi engine container memory in mb when the engine deploy mode is YARN. | int | 1.9.0 | +| kyuubi.engine.yarn.priority | <undefined> | kyuubi engine yarn priority when the engine deploy mode is YARN. | int | 1.9.0 | +| kyuubi.engine.yarn.queue | default | kyuubi engine yarn queue when the engine deploy mode is YARN. | string | 1.9.0 | +| kyuubi.engine.yarn.stagingDir | <undefined> | Staging directory used while submitting kyuubi engine to YARN, It should be a absolute path in HDFS. | string | 1.9.0 | +| kyuubi.engine.yarn.submit.timeout | PT30S | The engine submit timeout for YARN application. | duration | 1.7.2 | +| kyuubi.engine.yarn.tags | <undefined> | kyuubi engine yarn tags when the engine deploy mode is YARN. | seq | 1.9.0 | ### Event @@ -217,66 +234,69 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Frontend -| Key | Default | Meaning | Type | Since | -|--------------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.frontend.advertised.host | <undefined> | Hostname or IP of the Kyuubi server's frontend services to publish to external systems such as the service discovery ensemble and metadata store. Use it when you want to advertise a different hostname or IP than the bind host. | string | 1.8.0 | -| kyuubi.frontend.backoff.slot.length | PT0.1S | (deprecated) Time to back off during login to the thrift frontend service. | duration | 1.0.0 | -| kyuubi.frontend.bind.host | <undefined> | Hostname or IP of the machine on which to run the frontend services. | string | 1.0.0 | -| kyuubi.frontend.bind.port | 10009 | (deprecated) Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.0.0 | -| kyuubi.frontend.connection.url.use.hostname | true | When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issues. | boolean | 1.5.0 | -| kyuubi.frontend.login.timeout | PT20S | (deprecated) Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.0.0 | -| kyuubi.frontend.max.message.size | 104857600 | (deprecated) Maximum message size in bytes a Kyuubi server will accept. | int | 1.0.0 | -| kyuubi.frontend.max.worker.threads | 999 | (deprecated) Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | -| kyuubi.frontend.min.worker.threads | 9 | (deprecated) Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | -| kyuubi.frontend.mysql.bind.host | <undefined> | Hostname or IP of the machine on which to run the MySQL frontend service. | string | 1.4.0 | -| kyuubi.frontend.mysql.bind.port | 3309 | Port of the machine on which to run the MySQL frontend service. | int | 1.4.0 | -| kyuubi.frontend.mysql.max.worker.threads | 999 | Maximum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | -| kyuubi.frontend.mysql.min.worker.threads | 9 | Minimum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | -| kyuubi.frontend.mysql.netty.worker.threads | <undefined> | Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default. | int | 1.4.0 | -| kyuubi.frontend.mysql.worker.keepalive.time | PT1M | Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service | duration | 1.4.0 | -| kyuubi.frontend.protocols | THRIFT_BINARY,REST | A comma-separated list for all frontend protocols
        • THRIFT_BINARY - HiveServer2 compatible thrift binary protocol.
        • THRIFT_HTTP - HiveServer2 compatible thrift http protocol.
        • REST - Kyuubi defined REST API(experimental).
        • MYSQL - MySQL compatible text protocol(experimental).
        • TRINO - Trino compatible http protocol(experimental).
        | seq | 1.4.0 | -| kyuubi.frontend.proxy.http.client.ip.header | X-Real-IP | The HTTP header to record the real client IP address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any IP address, so it will not be used for authentication. | string | 1.6.0 | -| kyuubi.frontend.rest.bind.host | <undefined> | Hostname or IP of the machine on which to run the REST frontend service. | string | 1.4.0 | -| kyuubi.frontend.rest.bind.port | 10099 | Port of the machine on which to run the REST frontend service. | int | 1.4.0 | -| kyuubi.frontend.rest.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the rest frontend service | int | 1.6.2 | -| kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | -| kyuubi.frontend.ssl.keystore.type | <undefined> | SSL certificate keystore type. | string | 1.7.0 | -| kyuubi.frontend.thrift.backoff.slot.length | PT0.1S | Time to back off during login to the thrift frontend service. | duration | 1.4.0 | -| kyuubi.frontend.thrift.binary.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via the binary protocol. | string | 1.4.0 | -| kyuubi.frontend.thrift.binary.bind.port | 10009 | Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.4.0 | -| kyuubi.frontend.thrift.binary.ssl.disallowed.protocols | SSLv2,SSLv3 | SSL versions to disallow for Kyuubi thrift binary frontend. | set | 1.7.0 | -| kyuubi.frontend.thrift.binary.ssl.enabled | false | Set this to true for using SSL encryption in thrift binary frontend server. | boolean | 1.7.0 | -| kyuubi.frontend.thrift.binary.ssl.include.ciphersuites || A comma-separated list of include SSL cipher suite names for thrift binary frontend. | seq | 1.7.0 | -| kyuubi.frontend.thrift.http.allow.user.substitution | true | Allow alternate user to be specified as part of open connection request when using HTTP transport mode. | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via http protocol. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.bind.port | 10010 | Port of the machine on which to run the thrift frontend service via http protocol. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.compression.enabled | true | Enable thrift http compression via Jetty compression support | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.auth.enabled | true | When true, Kyuubi in HTTP transport mode, will use cookie-based authentication mechanism | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.domain | <undefined> | Domain for the Kyuubi generated cookies | string | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.is.httponly | true | HttpOnly attribute of the Kyuubi generated cookie. | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.max.age | 86400 | Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.cookie.path | <undefined> | Path for the Kyuubi generated cookies | string | 1.6.0 | -| kyuubi.frontend.thrift.http.max.idle.time | PT30M | Maximum idle time for a connection on the server when in HTTP mode. | duration | 1.6.0 | -| kyuubi.frontend.thrift.http.path | cliservice | Path component of URL endpoint when in HTTP mode. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.request.header.size | 6144 | Request header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.response.header.size | 6144 | Response header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites || A comma-separated list of exclude SSL cipher suite names for thrift http frontend. | seq | 1.7.0 | -| kyuubi.frontend.thrift.http.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.6.0 | -| kyuubi.frontend.thrift.http.ssl.protocol.blacklist | SSLv2,SSLv3 | SSL Versions to disable when using HTTP transport mode. | seq | 1.6.0 | -| kyuubi.frontend.thrift.http.use.SSL | false | Set this to true for using SSL encryption in http mode. | boolean | 1.6.0 | -| kyuubi.frontend.thrift.http.xsrf.filter.enabled | false | If enabled, Kyuubi will block any requests made to it over HTTP if an X-XSRF-HEADER header is not present | boolean | 1.6.0 | -| kyuubi.frontend.thrift.login.timeout | PT20S | Timeout for Thrift clients during login to the thrift frontend service. | duration | 1.4.0 | -| kyuubi.frontend.thrift.max.message.size | 104857600 | Maximum message size in bytes a Kyuubi server will accept. | int | 1.4.0 | -| kyuubi.frontend.thrift.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | -| kyuubi.frontend.thrift.min.worker.threads | 9 | Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | -| kyuubi.frontend.thrift.worker.keepalive.time | PT1M | Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.4.0 | -| kyuubi.frontend.trino.bind.host | <undefined> | Hostname or IP of the machine on which to run the TRINO frontend service. | string | 1.7.0 | -| kyuubi.frontend.trino.bind.port | 10999 | Port of the machine on which to run the TRINO frontend service. | int | 1.7.0 | -| kyuubi.frontend.trino.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the Trino frontend service | int | 1.7.0 | -| kyuubi.frontend.worker.keepalive.time | PT1M | (deprecated) Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.0.0 | +| Key | Default | Meaning | Type | Since | +|------------------------------------------------------------|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| +| kyuubi.frontend.advertised.host | <undefined> | Hostname or IP of the Kyuubi server's frontend services to publish to external systems such as the service discovery ensemble and metadata store. Use it when you want to advertise a different hostname or IP than the bind host. | string | 1.8.0 | +| kyuubi.frontend.bind.host | <undefined> | Hostname or IP of the machine on which to run the frontend services. | string | 1.0.0 | +| kyuubi.frontend.bind.port | 10009 | (deprecated) Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.0.0 | +| kyuubi.frontend.connection.url.use.hostname | true | When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issues. | boolean | 1.5.0 | +| kyuubi.frontend.max.message.size | 104857600 | (deprecated) Maximum message size in bytes a Kyuubi server will accept. | int | 1.0.0 | +| kyuubi.frontend.max.worker.threads | 999 | (deprecated) Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.min.worker.threads | 9 | (deprecated) Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.0.0 | +| kyuubi.frontend.mysql.bind.host | <undefined> | Hostname or IP of the machine on which to run the MySQL frontend service. | string | 1.4.0 | +| kyuubi.frontend.mysql.bind.port | 3309 | Port of the machine on which to run the MySQL frontend service. | int | 1.4.0 | +| kyuubi.frontend.mysql.max.worker.threads | 999 | Maximum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.min.worker.threads | 9 | Minimum number of threads in the command execution thread pool for the MySQL frontend service | int | 1.4.0 | +| kyuubi.frontend.mysql.netty.worker.threads | <undefined> | Number of thread in the netty worker event loop of MySQL frontend service. Use min(cpu_cores, 8) in default. | int | 1.4.0 | +| kyuubi.frontend.mysql.worker.keepalive.time | PT1M | Time(ms) that an idle async thread of the command execution thread pool will wait for a new task to arrive before terminating in MySQL frontend service | duration | 1.4.0 | +| kyuubi.frontend.protocols | THRIFT_BINARY,REST | A comma-separated list for all frontend protocols
        • THRIFT_BINARY - HiveServer2 compatible thrift binary protocol.
        • THRIFT_HTTP - HiveServer2 compatible thrift http protocol.
        • REST - Kyuubi defined REST API(experimental).
        • MYSQL - MySQL compatible text protocol(experimental).
        • TRINO - Trino compatible http protocol(experimental).
        | seq | 1.4.0 | +| kyuubi.frontend.proxy.http.client.ip.header | X-Real-IP | The HTTP header to record the real client IP address. If your server is behind a load balancer or other proxy, the server will see this load balancer or proxy IP address as the client IP address, to get around this common issue, most load balancers or proxies offer the ability to record the real remote IP address in an HTTP header that will be added to the request for other devices to use. Note that, because the header value can be specified to any IP address, so it will not be used for authentication. | string | 1.6.0 | +| kyuubi.frontend.rest.bind.host | <undefined> | Hostname or IP of the machine on which to run the REST frontend service. | string | 1.4.0 | +| kyuubi.frontend.rest.bind.port | 10099 | Port of the machine on which to run the REST frontend service. | int | 1.4.0 | +| kyuubi.frontend.rest.jetty.stopTimeout | PT5S | Stop timeout for Jetty server used by the RESTful frontend service. | duration | 1.8.1 | +| kyuubi.frontend.rest.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the rest frontend service | int | 1.6.2 | +| kyuubi.frontend.rest.proxy.jetty.client.idleTimeout | PT30S | The idle timeout in milliseconds for Jetty server used by the RESTful frontend service. | duration | 1.10.0 | +| kyuubi.frontend.rest.proxy.jetty.client.maxConnections | 32768 | The max number of connections per destination for Jetty server used by the RESTful frontend service. | int | 1.10.0 | +| kyuubi.frontend.rest.proxy.jetty.client.maxThreads | 256 | The max number of threads of HttpClient's Executor for Jetty server used by the RESTful frontend service. | int | 1.10.0 | +| kyuubi.frontend.rest.proxy.jetty.client.requestBufferSize | 4096 | Size of the buffer in bytes used to write requests for Jetty server used by the RESTful frontend service. | int | 1.10.0 | +| kyuubi.frontend.rest.proxy.jetty.client.responseBufferSize | 4096 | Size of the buffer in bytes used to read response for Jetty server used by the RESTful frontend service. | int | 1.10.0 | +| kyuubi.frontend.rest.proxy.jetty.client.timeout | PT60S | The total timeout in milliseconds for Jetty server used by the RESTful frontend service. | duration | 1.10.0 | +| kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | +| kyuubi.frontend.ssl.keystore.type | <undefined> | SSL certificate keystore type. | string | 1.7.0 | +| kyuubi.frontend.thrift.binary.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via the binary protocol. | string | 1.4.0 | +| kyuubi.frontend.thrift.binary.bind.port | 10009 | Port of the machine on which to run the thrift frontend service via the binary protocol. | int | 1.4.0 | +| kyuubi.frontend.thrift.binary.ssl.disallowed.protocols | SSLv2,SSLv3 | SSL versions to disallow for Kyuubi thrift binary frontend. | set | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.enabled | false | Set this to true for using SSL encryption in thrift binary frontend server. | boolean | 1.7.0 | +| kyuubi.frontend.thrift.binary.ssl.include.ciphersuites || A comma-separated list of include SSL cipher suite names for thrift binary frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.bind.host | <undefined> | Hostname or IP of the machine on which to run the thrift frontend service via http protocol. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.bind.port | 10010 | Port of the machine on which to run the thrift frontend service via http protocol. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.compression.enabled | true | Enable thrift http compression via Jetty compression support | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.auth.enabled | true | When true, Kyuubi in HTTP transport mode, will use cookie-based authentication mechanism | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.domain | <undefined> | Domain for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.is.httponly | true | HttpOnly attribute of the Kyuubi generated cookie. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.max.age | 86400 | Maximum age in seconds for server side cookie used by Kyuubi in HTTP mode. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.cookie.path | <undefined> | Path for the Kyuubi generated cookies | string | 1.6.0 | +| kyuubi.frontend.thrift.http.max.idle.time | PT30M | Maximum idle time for a connection on the server when in HTTP mode. | duration | 1.6.0 | +| kyuubi.frontend.thrift.http.path | cliservice | Path component of URL endpoint when in HTTP mode. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.request.header.size | 6144 | Request header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.response.header.size | 6144 | Response header size in bytes, when using HTTP transport mode. Jetty defaults used. | int | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.exclude.ciphersuites || A comma-separated list of exclude SSL cipher suite names for thrift http frontend. | seq | 1.7.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.6.0 | +| kyuubi.frontend.thrift.http.ssl.protocol.blacklist | SSLv2,SSLv3 | SSL Versions to disable when using HTTP transport mode. | seq | 1.6.0 | +| kyuubi.frontend.thrift.http.use.SSL | false | Set this to true for using SSL encryption in http mode. | boolean | 1.6.0 | +| kyuubi.frontend.thrift.http.xsrf.filter.enabled | false | If enabled, Kyuubi will block any requests made to it over HTTP if an X-XSRF-HEADER header is not present | boolean | 1.6.0 | +| kyuubi.frontend.thrift.max.message.size | 104857600 | Maximum message size in bytes a Kyuubi server will accept. | int | 1.4.0 | +| kyuubi.frontend.thrift.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.min.worker.threads | 9 | Minimum number of threads in the frontend worker thread pool for the thrift frontend service | int | 1.4.0 | +| kyuubi.frontend.thrift.worker.keepalive.time | PT1M | Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.4.0 | +| kyuubi.frontend.trino.bind.host | <undefined> | Hostname or IP of the machine on which to run the TRINO frontend service. | string | 1.7.0 | +| kyuubi.frontend.trino.bind.port | 10999 | Port of the machine on which to run the TRINO frontend service. | int | 1.7.0 | +| kyuubi.frontend.trino.jetty.stopTimeout | PT5S | Stop timeout for Jetty server used by the Trino frontend service. | duration | 1.8.1 | +| kyuubi.frontend.trino.max.worker.threads | 999 | Maximum number of threads in the frontend worker thread pool for the Trino frontend service | int | 1.7.0 | +| kyuubi.frontend.worker.keepalive.time | PT1M | (deprecated) Keep-alive time (in milliseconds) for an idle worker thread | duration | 1.0.0 | ### Ha @@ -290,7 +310,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.ha.etcd.ssl.client.key.path | <undefined> | Where the etcd SSL key file is stored. | string | 1.6.0 | | kyuubi.ha.etcd.ssl.enabled | false | When set to true, will build an SSL secured etcd client. | boolean | 1.6.0 | | kyuubi.ha.namespace | kyuubi | The root directory for the service to deploy its instance uri | string | 1.6.0 | -| kyuubi.ha.zookeeper.acl.enabled | false | Set to true if the ZooKeeper ensemble is kerberized | boolean | 1.0.0 | +| kyuubi.ha.zookeeper.acl.enabled | false | (deprecated) Set to true if the ZooKeeper ensemble is kerberized | boolean | 1.0.0 | | kyuubi.ha.zookeeper.auth.digest | <undefined> | The digest auth string is used for ZooKeeper authentication, like: username:password. | string | 1.3.2 | | kyuubi.ha.zookeeper.auth.keytab | <undefined> | Location of the Kyuubi server's keytab that is used for ZooKeeper authentication. | string | 1.3.2 | | kyuubi.ha.zookeeper.auth.principal | <undefined> | Kerberos principal name that is used for ZooKeeper authentication. | string | 1.3.2 | @@ -348,24 +368,24 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Metadata -| Key | Default | Meaning | Type | Since | -|-------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | -| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | -| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | -| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | -| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | -| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
        • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
        • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
        • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
        • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
        • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | -| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | +| Key | Default | Meaning | Type | Since | +|-------------------------------------------------|----------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | +| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | +| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | +| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | +| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | +| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
          • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
          • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
          • POSTGRESQL: PostgreSQL, JDBC driver `org.postgresql.Driver`.
          • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
          • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:<KYUUBI_HOME>/kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across Kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. Note: this value support the variables substitution: ``. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics @@ -384,7 +404,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | Key | Default | Meaning | Type | Since | |--------------------------------------------------|---------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by returning table identities only. | boolean | 1.8.0 | +| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by ignoring `tableTypes` query criteria, and returning table identities only. | boolean | 1.8.0 | | kyuubi.operation.idle.timeout | PT3H | Operation will be closed when it's not accessed for this duration of time | duration | 1.0.0 | | kyuubi.operation.interrupt.on.cancel | true | When true, all running tasks will be interrupted if one cancels a query. When false, all running tasks will remain until finished. | boolean | 1.2.0 | | kyuubi.operation.language | SQL | Choose a programing language for the following inputs
            • SQL: (Default) Run all following statements as SQL queries.
            • SCALA: Run all following input as scala codes
            • PYTHON: (Experimental) Run all following input as Python codes with Spark engine
            | string | 1.5.0 | @@ -425,56 +445,58 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Session -| Key | Default | Meaning | Type | Since | -|------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | -| kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | -| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | seq | 1.5.0 | -| kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | -| kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | -| kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | -| kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | -| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.0 | -| kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | -| kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | -| kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | -| kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | -| kyuubi.session.engine.flink.fetch.timeout | <undefined> | Result fetch timeout for Flink engine. If the timeout is reached, the result fetch would be stopped and the current fetched would be returned. If no data are fetched, a TimeoutException would be thrown. | duration | 1.8.0 | -| kyuubi.session.engine.flink.initialize.sql || The initialize sql for Flink session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | -| kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | -| kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | -| kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | -| kyuubi.session.engine.idle.timeout | PT30M | engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate. | duration | 1.0.0 | -| kyuubi.session.engine.initialize.timeout | PT3M | Timeout for starting the background engine, e.g. SparkSQLEngine. | duration | 1.0.0 | -| kyuubi.session.engine.launch.async | true | When opening kyuubi session, whether to launch the backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously. | boolean | 1.4.0 | -| kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | -| kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | -| kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | -| kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | -| kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | -| kyuubi.session.engine.spark.initialize.sql || The initialize sql for Spark session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | -| kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | -| kyuubi.session.engine.spark.max.initial.wait | PT1M | Max wait time for the initial connection to Spark engine. The engine will self-terminate no new incoming connection is established within this time. This setting only applies at the CONNECTION share level. 0 or negative means not to self-terminate. | duration | 1.8.0 | -| kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | -| kyuubi.session.engine.spark.progress.timeFormat | yyyy-MM-dd HH:mm:ss.SSS | The time format of the progress bar | string | 1.6.0 | -| kyuubi.session.engine.spark.progress.update.interval | PT1S | Update period of progress bar. | duration | 1.6.0 | -| kyuubi.session.engine.spark.showProgress | false | When true, show the progress bar in the Spark's engine log. | boolean | 1.6.0 | -| kyuubi.session.engine.startup.destroy.timeout | PT5S | Engine startup process destroy wait time, if the process does not stop after this time, force destroy instead. This configuration only takes effect when `kyuubi.session.engine.startup.waitCompletion=false`. | duration | 1.8.0 | -| kyuubi.session.engine.startup.error.max.size | 8192 | During engine bootstrapping, if anderror occurs, using this config to limit the length of error message(characters). | int | 1.1.0 | -| kyuubi.session.engine.startup.maxLogLines | 10 | The maximum number of engine log lines when errors occur during the engine startup phase. Note that this config effects on client-side to help track engine startup issues. | int | 1.4.0 | -| kyuubi.session.engine.startup.waitCompletion | true | Whether to wait for completion after the engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as in yarn-cluster mode; Otherwise, the engine will be killed. | boolean | 1.5.0 | -| kyuubi.session.engine.trino.connection.catalog | <undefined> | The default catalog that Trino engine will connect to | string | 1.5.0 | -| kyuubi.session.engine.trino.connection.url | <undefined> | The server url that Trino engine will connect to | string | 1.5.0 | -| kyuubi.session.engine.trino.main.resource | <undefined> | The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default | string | 1.5.0 | -| kyuubi.session.engine.trino.showProgress | true | When true, show the progress bar and final info in the Trino engine log. | boolean | 1.6.0 | -| kyuubi.session.engine.trino.showProgress.debug | false | When true, show the progress debug info in the Trino engine log. | boolean | 1.6.0 | -| kyuubi.session.group.provider | hadoop | A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different users or session configs. This config value should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which has a zero-arg constructor. Kyuubi provides the following built-in implementations:
          • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
          • | string | 1.7.0 | -| kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | -| kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | -| kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | -| kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | -| kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | -| kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | +| Key | Default | Meaning | Type | Since | +|---------------------------------------------------------|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.session.check.interval | PT5M | The check interval for session timeout. | duration | 1.0.0 | +| kyuubi.session.close.on.disconnect | true | Session will be closed when client disconnects from kyuubi gateway. Set this to false to have session outlive its parent connection. | boolean | 1.8.0 | +| kyuubi.session.conf.advisor | <undefined> | A config advisor plugin for Kyuubi Server. This plugin can provide a list of custom configs for different users or session configs and overwrite the session configs before opening a new session. This config value should be a subclass of `org.apache.kyuubi.plugin.SessionConfAdvisor` which has a zero-arg constructor. | seq | 1.5.0 | +| kyuubi.session.conf.file.reload.interval | PT10M | When `FileSessionConfAdvisor` is used, this configuration defines the expired time of `$KYUUBI_CONF_DIR/kyuubi-session-.conf` in the cache. After exceeding this value, the file will be reloaded. | duration | 1.7.0 | +| kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | +| kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | +| kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | +| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.1 | +| kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | +| kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | +| kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | +| kyuubi.session.engine.check.interval | PT1M | The check interval for engine timeout | duration | 1.0.0 | +| kyuubi.session.engine.flink.fetch.timeout | <undefined> | Result fetch timeout for Flink engine. If the timeout is reached, the result fetch would be stopped and the current fetched would be returned. If no data are fetched, a TimeoutException would be thrown. | duration | 1.8.0 | +| kyuubi.session.engine.flink.initialize.sql || The initialize sql for Flink session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | +| kyuubi.session.engine.flink.main.resource | <undefined> | The package used to create Flink SQL engine remote job. If it is undefined, Kyuubi will use the default | string | 1.4.0 | +| kyuubi.session.engine.flink.max.rows | 1000000 | Max rows of Flink query results. For batch queries, rows exceeding the limit would be ignored. For streaming queries, the query would be canceled if the limit is reached. | int | 1.5.0 | +| kyuubi.session.engine.hive.main.resource | <undefined> | The package used to create Hive engine remote job. If it is undefined, Kyuubi will use the default | string | 1.6.0 | +| kyuubi.session.engine.idle.timeout | PT30M | engine timeout, the engine will self-terminate when it's not accessed for this duration. 0 or negative means not to self-terminate. | duration | 1.0.0 | +| kyuubi.session.engine.initialize.timeout | PT3M | Timeout for starting the background engine, e.g. SparkSQLEngine. | duration | 1.0.0 | +| kyuubi.session.engine.launch.async | true | When opening kyuubi session, whether to launch the backend engine asynchronously. When true, the Kyuubi server will set up the connection with the client without delay as the backend engine will be created asynchronously. | boolean | 1.4.0 | +| kyuubi.session.engine.log.timeout | PT24H | If we use Spark as the engine then the session submit log is the console output of spark-submit. We will retain the session submit log until over the config value. | duration | 1.1.0 | +| kyuubi.session.engine.login.timeout | PT15S | The timeout of creating the connection to remote sql query engine | duration | 1.0.0 | +| kyuubi.session.engine.open.max.attempts | 9 | The number of times an open engine will retry when encountering a special error. | int | 1.7.0 | +| kyuubi.session.engine.open.onFailure | RETRY | The behavior when opening engine failed:
            • RETRY: retry to open engine for kyuubi.session.engine.open.max.attempts times.
            • DEREGISTER_IMMEDIATELY: deregister the engine immediately.
            • DEREGISTER_AFTER_RETRY: deregister the engine after retry to open engine for kyuubi.session.engine.open.max.attempts times.
            | string | 1.8.1 | +| kyuubi.session.engine.open.retry.wait | PT10S | How long to wait before retrying to open the engine after failure. | duration | 1.7.0 | +| kyuubi.session.engine.share.level | USER | (deprecated) - Using kyuubi.engine.share.level instead | string | 1.0.0 | +| kyuubi.session.engine.spark.initialize.sql || The initialize sql for Spark session. It fallback to `kyuubi.engine.session.initialize.sql` | seq | 1.8.1 | +| kyuubi.session.engine.spark.main.resource | <undefined> | The package used to create Spark SQL engine remote application. If it is undefined, Kyuubi will use the default | string | 1.0.0 | +| kyuubi.session.engine.spark.max.initial.wait | PT1M | Max wait time for the initial connection to Spark engine. The engine will self-terminate no new incoming connection is established within this time. This setting only applies at the CONNECTION share level. 0 or negative means not to self-terminate. | duration | 1.8.0 | +| kyuubi.session.engine.spark.max.lifetime | PT0S | Max lifetime for Spark engine, the engine will self-terminate when it reaches the end of life. 0 or negative means not to self-terminate. | duration | 1.6.0 | +| kyuubi.session.engine.spark.max.lifetime.gracefulPeriod | PT0S | Graceful period for Spark engine to wait the connections disconnected after reaching the end of life. After the graceful period, all the connections without running operations will be forcibly disconnected. 0 or negative means always waiting the connections disconnected. | duration | 1.8.1 | +| kyuubi.session.engine.spark.progress.timeFormat | yyyy-MM-dd HH:mm:ss.SSS | The time format of the progress bar | string | 1.6.0 | +| kyuubi.session.engine.spark.progress.update.interval | PT1S | Update period of progress bar. | duration | 1.6.0 | +| kyuubi.session.engine.spark.showProgress | false | When true, show the progress bar in the Spark's engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.startup.destroy.timeout | PT5S | Engine startup process destroy wait time, if the process does not stop after this time, force destroy instead. This configuration only takes effect when `kyuubi.session.engine.startup.waitCompletion=false`. | duration | 1.8.0 | +| kyuubi.session.engine.startup.error.max.size | 8192 | During engine bootstrapping, if anderror occurs, using this config to limit the length of error message(characters). | int | 1.1.0 | +| kyuubi.session.engine.startup.maxLogLines | 10 | The maximum number of engine log lines when errors occur during the engine startup phase. Note that this config effects on client-side to help track engine startup issues. | int | 1.4.0 | +| kyuubi.session.engine.startup.waitCompletion | true | Whether to wait for completion after the engine starts. If false, the startup process will be destroyed after the engine is started. Note that only use it when the driver is not running locally, such as in yarn-cluster mode; Otherwise, the engine will be killed. | boolean | 1.5.0 | +| kyuubi.session.engine.trino.connection.catalog | <undefined> | The default catalog that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.connection.url | <undefined> | The server url that Trino engine will connect to | string | 1.5.0 | +| kyuubi.session.engine.trino.main.resource | <undefined> | The package used to create Trino engine remote job. If it is undefined, Kyuubi will use the default | string | 1.5.0 | +| kyuubi.session.engine.trino.showProgress | true | When true, show the progress bar and final info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.engine.trino.showProgress.debug | false | When true, show the progress debug info in the Trino engine log. | boolean | 1.6.0 | +| kyuubi.session.group.provider | hadoop | A group provider plugin for Kyuubi Server. This plugin can provide primary group and groups information for different users or session configs. This config value should be a subclass of `org.apache.kyuubi.plugin.GroupProvider` which has a zero-arg constructor. Kyuubi provides the following built-in implementations:
          • hadoop: delegate the user group mapping to hadoop UserGroupInformation.
          • | string | 1.7.0 | +| kyuubi.session.idle.timeout | PT6H | session idle timeout, it will be closed when it's not accessed for this duration | duration | 1.2.0 | +| kyuubi.session.local.dir.allow.list || The local dir list that are allowed to access by the kyuubi session application. End-users might set some parameters such as `spark.files` and it will upload some local files when launching the kyuubi engine, if the local dir allow list is defined, kyuubi will check whether the path to upload is in the allow list. Note that, if it is empty, there is no limitation for that. And please use absolute paths. | set | 1.6.0 | +| kyuubi.session.name | <undefined> | A human readable name of the session and we use empty string by default. This name will be recorded in the event. Note that, we only apply this value from session conf. | string | 1.4.0 | +| kyuubi.session.proxy.user | <undefined> | An alternative to hive.server2.proxy.user. The current behavior is consistent with hive.server2.proxy.user and now only takes effect in RESTFul API. When both parameters are set, kyuubi.session.proxy.user takes precedence. | string | 1.9.0 | +| kyuubi.session.timeout | PT6H | (deprecated)session timeout, it will be closed when it's not accessed for this duration | duration | 1.0.0 | +| kyuubi.session.user.sign.enabled | false | Whether to verify the integrity of session user name on the engine side, e.g. Authz plugin in Spark. | boolean | 1.7.0 | ### Spnego @@ -499,11 +521,11 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.zookeeper.embedded.client.use.hostname | false | When true, embedded Zookeeper prefer to bind hostname, otherwise, ip address. | boolean | 1.7.2 | | kyuubi.zookeeper.embedded.data.dir | embedded_zookeeper | dataDir for the embedded zookeeper server where stores the in-memory database snapshots and, unless specified otherwise, the transaction log of updates to the database. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | | kyuubi.zookeeper.embedded.data.log.dir | embedded_zookeeper | dataLogDir for the embedded ZooKeeper server where writes the transaction log. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.2.0 | -| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | The temporary directory for the embedded ZooKeeper server. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.0.0 | +| kyuubi.zookeeper.embedded.directory | embedded_zookeeper | (deprecated) The temporary directory for the embedded ZooKeeper server. If it is a relative path, it is resolved relative to KYUUBI_HOME. | string | 1.0.0 | | kyuubi.zookeeper.embedded.max.client.connections | 120 | maxClientCnxns for the embedded ZooKeeper server to limit the number of concurrent connections of a single client identified by IP address | int | 1.2.0 | | kyuubi.zookeeper.embedded.max.session.timeout | 60000 | maxSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 20 times the tickTime | int | 1.2.0 | | kyuubi.zookeeper.embedded.min.session.timeout | 6000 | minSessionTimeout in milliseconds for the embedded ZooKeeper server will allow the client to negotiate. Defaults to 2 times the tickTime | int | 1.2.0 | -| kyuubi.zookeeper.embedded.port | 2181 | The port of the embedded ZooKeeper server | int | 1.0.0 | +| kyuubi.zookeeper.embedded.port | 2181 | (deprecated) The port of the embedded ZooKeeper server | int | 1.0.0 | | kyuubi.zookeeper.embedded.tick.time | 3000 | tickTime in milliseconds for the embedded ZooKeeper server | int | 1.2.0 | ## Spark Configurations @@ -518,7 +540,11 @@ Setting them in `$KYUUBI_HOME/conf/kyuubi-defaults.conf` supplies with default v ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#spark.sql.shuffle.partitions=2;spark.executor.memory=5g +``` - **Runtime SQL Configuration** - For [Runtime SQL Configurations](https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration), they will take affect every time @@ -550,7 +576,11 @@ The below options in `kyuubi-defaults.conf` will set `parallelism.default: 2` an ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#flink.parallelism.default=2;flink.taskmanager.memory.process.size=5g +``` ### Via SET Statements @@ -577,7 +607,11 @@ The below options in `kyuubi-defaults.conf` will set `query_max_stage_count: 500 ### Via JDBC Connection URL -Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: ```jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true``` +Setting them in the JDBC Connection URL supplies session-specific for each SQL engine. For example: + +``` +jdbc:hive2://localhost:10009/default;#trino.query_max_stage_count=500;trino.parse_decimal_literals_as_double=true +``` ### Via SET Statements diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index ffc45404f5e..9b7e67c3fb0 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -25,6 +25,7 @@ import java.util.concurrent.atomic.AtomicBoolean import scala.concurrent.duration.Duration import scala.util.control.NonFatal +import com.fasterxml.jackson.databind.ObjectMapper import com.google.common.annotations.VisibleForTesting import org.apache.hadoop.fs.Path import org.apache.spark.{ui, SparkConf} @@ -56,7 +57,7 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin private val shutdown = new AtomicBoolean(false) private val gracefulStopDeregistered = new AtomicBoolean(false) - + private val objectMapper = new ObjectMapper @volatile private var lifetimeTerminatingChecker: Option[ScheduledExecutorService] = None @volatile private var stopEngineExec: Option[ThreadPoolExecutor] = None private lazy val engineSavePath = @@ -188,18 +189,17 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin .flatMap { stage => statusTracker.getStageInfo(stage).map(_.numActiveTasks) }.sum - val engineMetrics = Map( - "openSessionCount" -> openSessionCount, - "activeTask" -> activeTask, - "poolId" -> engineSpace.split("-").last) - info(s"Spark engine has $openSessionCount open sessions and $activeTask active tasks.") + val engineMetrics = objectMapper.createObjectNode() + .put("openSessionCount", openSessionCount) + .put("activeTask", activeTask) + .put("poolID", engineSpace.split("-").last.toInt).toString DiscoveryClientProvider.withDiscoveryClient(conf) { client => if (client.pathNonExists(metricsSpace)) { client.create(metricsSpace, "PERSISTENT") } client.setData( s"/metrics$engineSpace", - engineMetrics.map { case (k, v) => s"$k=$v" }.mkString(";").getBytes) + engineMetrics.getBytes) } } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 7a2b324efba..885da4f8c1c 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -2271,7 +2271,7 @@ object KyuubiConf { val ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD: ConfigEntry[Int] = buildConf("kyuubi.engine.pool.adaptive.session.threshold") .doc("The threshold of a engine open session count for adaptive engine pool select policy.") - .version("1.9.0") + .version("1.10.0") .intConf .checkValue(_ >= 1, "must be positive number") .createWithDefault(10) @@ -2279,7 +2279,7 @@ object KyuubiConf { val ENGINE_REPORT_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.engine.report.interval") .doc("The check interval for engine report to the server") - .version("1.9.0") + .version("1.10.0") .timeConf .checkValue(_ >= Duration.ofSeconds(1).toMillis, "Minimum 1 seconds") .createWithDefault(Duration.ofMinutes(1).toMillis) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index bde0f1ab48a..3222fafa488 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -23,6 +23,8 @@ import scala.collection.JavaConverters._ import scala.util.Random import com.codahale.metrics.MetricRegistry +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.annotations.VisibleForTesting import org.apache.kyuubi.{KYUUBI_VERSION, KyuubiSQLException, Logging, Utils} @@ -66,6 +68,8 @@ private[kyuubi] class EngineRef( private val timeout: Long = conf.get(ENGINE_INIT_TIMEOUT) + private val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) + // Share level of the engine private val shareLevel: ShareLevel = ShareLevel.withName(conf.get(ENGINE_SHARE_LEVEL)) @@ -378,10 +382,10 @@ private[kyuubi] class EngineRef( } } - def getAdaptivePoolId(poolSize: Int): Int = { + private def getAdaptivePoolId(poolSize: Int): Int = { val sessionThreshold = conf.get(ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD) val metricsSpace = - s"/metrics/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_${engineType}/$user" + s"/metrics/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_$engineType/$sessionUser" DiscoveryClientProvider.withDiscoveryClient(conf) { client => tryWithLock(client) { if (client.pathNonExists(metricsSpace)) { @@ -394,25 +398,23 @@ private[kyuubi] class EngineRef( } else { engineType match { case SPARK_SQL => - val engineMetricsMap = metrics.map(p => - new String(client.getData(s"$metricsSpace/$p")) - .split(";") - .map(_.split("=", 2)) - .filter(_.length == 2) - .map(kv => (kv.head, kv.last.toInt)) - .toMap) + val engineMetricsMap = metrics.map { p => + objectMapper.readValue( + new String(client.getData(s"$metricsSpace/$p")), + classOf[Map[String, Int]]) + } if (engineMetricsMap.isEmpty) { return Random.nextInt(poolSize) } else { - val sortedEngineMetrics = engineMetricsMap.sortBy { map => - ( - map.getOrElse("openSessionCount", sessionThreshold), - map.getOrElse("activeTask", 0)) - } - val candidate = sortedEngineMetrics.head - if (candidate.contains("poolId") && (candidate( - "openSessionCount") < sessionThreshold || metrics.size == poolSize)) { - candidate("poolId") + val candidate = engineMetricsMap.filter(_.contains("poolID")) + .minBy { map => + ( + map.getOrElse("openSessionCount", sessionThreshold), + map.getOrElse("activeTask", 0)) + } + if ((candidate.nonEmpty && candidate("openSessionCount") < sessionThreshold) || + metrics.size == poolSize) { + candidate("poolID") } else { Random.nextInt(poolSize) } From 688ea7469f22b962439fbeba9c15c1559436ec45 Mon Sep 17 00:00:00 2001 From: senmiaoliu Date: Sun, 28 Apr 2024 19:40:28 +0800 Subject: [PATCH 4/4] fix style --- docs/configuration/settings.md | 50 ++++++++++--------- .../kyuubi/engine/spark/SparkSQLEngine.scala | 4 +- .../main/scala/org/apache/kyuubi/Utils.scala | 10 ++++ .../org/apache/kyuubi/config/KyuubiConf.scala | 2 +- .../ha/client/EngineServiceDiscovery.scala | 3 +- .../org/apache/kyuubi/engine/EngineRef.scala | 6 +-- 6 files changed, 45 insertions(+), 30 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index 61d6031c08c..1dd46a6e736 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -158,9 +158,10 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.engine.jdbc.connection.password | <undefined> | The password is used for connecting to server | string | 1.6.0 | | kyuubi.engine.jdbc.connection.propagateCredential | false | Whether to use the session's user and password to connect to database | boolean | 1.8.0 | | kyuubi.engine.jdbc.connection.properties || The additional properties are used for connecting to server | seq | 1.6.0 | -| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
          • doris: For establishing Doris connections.
          • mysql: For establishing MySQL connections.
          • phoenix: For establishing Phoenix connections.
          • postgresql: For establishing PostgreSQL connections.
          • starrocks: For establishing StarRocks connections.
          • | string | 1.6.0 | +| kyuubi.engine.jdbc.connection.provider | <undefined> | A JDBC connection provider plugin for the Kyuubi Server to establish a connection to the JDBC URL. The configuration value should be a subclass of `org.apache.kyuubi.engine.jdbc.connection.JdbcConnectionProvider`. Kyuubi provides the following built-in implementations:
          • doris: For establishing Doris connections.
          • mysql: For establishing MySQL connections.
          • phoenix: For establishing Phoenix connections.
          • postgresql: For establishing PostgreSQL connections.
          • starrocks: For establishing StarRocks connections.
          • impala: For establishing Impala connections.
          • clickhouse: For establishing clickhouse connections.
          • | string | 1.6.0 | | kyuubi.engine.jdbc.connection.url | <undefined> | The server url that engine will connect to | string | 1.6.0 | | kyuubi.engine.jdbc.connection.user | <undefined> | The user is used for connecting to server | string | 1.6.0 | +| kyuubi.engine.jdbc.deploy.mode | LOCAL | Configures the jdbc engine deploy mode, The value can be 'local', 'yarn'. In local mode, the engine operates on the same node as the KyuubiServer. In YARN mode, the engine runs within the Application Master (AM) container of YARN. | string | 1.10.0 | | kyuubi.engine.jdbc.driver.class | <undefined> | The driver class for JDBC engine connection | string | 1.6.0 | | kyuubi.engine.jdbc.extra.classpath | <undefined> | The extra classpath for the JDBC query engine, for configuring the location of the JDBC driver and etc. | string | 1.6.0 | | kyuubi.engine.jdbc.fetch.size | 1000 | The fetch size of JDBC engine | int | 1.9.0 | @@ -180,7 +181,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.engine.pool.size | -1 | The size of the engine pool. Note that, if the size is less than 1, the engine pool will not be enabled; otherwise, the size of the engine pool will be min(this, kyuubi.engine.pool.size.threshold). | int | 1.4.0 | | kyuubi.engine.pool.size.threshold | 9 | This parameter is introduced as a server-side parameter controlling the upper limit of the engine pool. | int | 1.4.0 | | kyuubi.engine.principal | <undefined> | Kerberos principal for the kyuubi engine. | string | 1.10.0 | -| kyuubi.engine.report.interval | PT1M | The check interval for engine report to the server | duration | 1.10.0 | +| kyuubi.engine.report.interval | PT1M | The interval for the engine to report metrics when using the ADAPTIVE select policy. | duration | 1.10.0 | | kyuubi.engine.session.initialize.sql || SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver. | seq | 1.3.0 | | kyuubi.engine.share.level | USER | Engines will be shared in different levels, available configs are:
            • CONNECTION: the engine will not be shared but only used by the current client connection, and the engine will be launched by session user.
            • USER: the engine will be shared by all sessions created by a unique username, and the engine will be launched by session user.
            • GROUP: the engine will be shared by all sessions created by all users belong to the same primary group name. The engine will be launched by the primary group name as the effective username, so here the group name is in value of special user who is able to visit the computing resources/data of the team. It follows the [Hadoop GroupsMapping](https://reurl.cc/xE61Y5) to map user to a primary group. If the primary group is not found, it fallback to the USER level.
            • SERVER: the engine will be shared by Kyuubi servers, and the engine will be launched by Server's user.
            See also `kyuubi.engine.share.level.subdomain` and `kyuubi.engine.doAs.enabled`. | string | 1.2.0 | | kyuubi.engine.share.level.sub.domain | <undefined> | (deprecated) - Using kyuubi.engine.share.level.subdomain instead | string | 1.2.0 | @@ -207,7 +208,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.engine.trino.java.options | <undefined> | The extra Java options for the Trino query engine | string | 1.6.0 | | kyuubi.engine.trino.memory | 1g | The heap memory for the Trino query engine | string | 1.6.0 | | kyuubi.engine.trino.operation.incremental.collect | false | When true, the result will be sequentially calculated and returned to the trino. It fallback to `kyuubi.operation.incremental.collect` | boolean | 1.10.0 | -| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
            • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
            • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
            • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
            • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
            • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris, MySQL, Phoenix, PostgreSQL and StarRocks.
            • CHAT: specify this engine type will launch a Chat engine.
            | string | 1.4.0 | +| kyuubi.engine.type | SPARK_SQL | Specify the detailed engine supported by Kyuubi. The engine type bindings to SESSION scope. This configuration is experimental. Currently, available configs are:
            • SPARK_SQL: specify this engine type will launch a Spark engine which can provide all the capacity of the Apache Spark. Note, it's a default engine type.
            • FLINK_SQL: specify this engine type will launch a Flink engine which can provide all the capacity of the Apache Flink.
            • TRINO: specify this engine type will launch a Trino engine which can provide all the capacity of the Trino.
            • HIVE_SQL: specify this engine type will launch a Hive engine which can provide all the capacity of the Hive Server2.
            • JDBC: specify this engine type will launch a JDBC engine which can forward queries to the database system through the certain JDBC driver, for now, it supports Doris, MySQL, Phoenix, PostgreSQL, StarRocks, Impala and ClickHouse.
            • CHAT: specify this engine type will launch a Chat engine.
            | string | 1.4.0 | | kyuubi.engine.ui.retainedSessions | 200 | The number of SQL client sessions kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | | kyuubi.engine.ui.retainedStatements | 200 | The number of statements kept in the Kyuubi Query Engine web UI. | int | 1.4.0 | | kyuubi.engine.ui.stop.enabled | true | When true, allows Kyuubi engine to be killed from the Spark Web UI. | boolean | 1.3.0 | @@ -261,6 +262,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.frontend.rest.proxy.jetty.client.requestBufferSize | 4096 | Size of the buffer in bytes used to write requests for Jetty server used by the RESTful frontend service. | int | 1.10.0 | | kyuubi.frontend.rest.proxy.jetty.client.responseBufferSize | 4096 | Size of the buffer in bytes used to read response for Jetty server used by the RESTful frontend service. | int | 1.10.0 | | kyuubi.frontend.rest.proxy.jetty.client.timeout | PT60S | The total timeout in milliseconds for Jetty server used by the RESTful frontend service. | duration | 1.10.0 | +| kyuubi.frontend.rest.ui.enabled | true | Whether to enable Web UI when RESTful protocol is enabled | boolean | 1.10.0 | | kyuubi.frontend.ssl.keystore.algorithm | <undefined> | SSL certificate keystore algorithm. | string | 1.7.0 | | kyuubi.frontend.ssl.keystore.password | <undefined> | SSL certificate keystore password. | string | 1.7.0 | | kyuubi.frontend.ssl.keystore.path | <undefined> | SSL certificate keystore location. | string | 1.7.0 | @@ -339,26 +341,27 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Kubernetes -| Key | Default | Meaning | Type | Since | -|----------------------------------------------------------------------|-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.kubernetes.application.state.container | spark-kubernetes-driver | The container name to retrieve the application state from. | string | 1.8.1 | -| kyuubi.kubernetes.application.state.source | POD | The source to retrieve the application state from. The valid values are pod and container. If the source is container and there is container inside the pod with the name of kyuubi.kubernetes.application.state.container, the application state will be from the matched container state. Otherwise, the application state will be from the pod state. | string | 1.8.1 | -| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | -| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | -| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | -| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | -| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | -| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | -| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | -| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval | PT1M | Kyuubi server use guava cache as the cleanup trigger with time-based eviction, but the eviction would not happened until any get/put operation happened. This option schedule a daemon thread evict cache periodically. | duration | 1.8.1 | -| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | -| kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | -| kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | -| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | -| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | +| Key | Default | Meaning | Type | Since | +|----------------------------------------------------------------------|----------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|--------| +| kyuubi.kubernetes.application.state.container | spark-kubernetes-driver | The container name to retrieve the application state from. | string | 1.8.1 | +| kyuubi.kubernetes.application.state.source | POD | The source to retrieve the application state from. The valid values are pod and container. If the source is container and there is container inside the pod with the name of kyuubi.kubernetes.application.state.container, the application state will be from the matched container state. Otherwise, the application state will be from the pod state. | string | 1.8.1 | +| kyuubi.kubernetes.authenticate.caCertFile | <undefined> | Path to the CA cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientCertFile | <undefined> | Path to the client cert file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.clientKeyFile | <undefined> | Path to the client key file for connecting to the Kubernetes API server over TLS from the kyuubi. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthToken | <undefined> | The OAuth token to use when authenticating against the Kubernetes API server. Note that unlike, the other authentication options, this must be the exact string value of the token to use for the authentication. | string | 1.7.0 | +| kyuubi.kubernetes.authenticate.oauthTokenFile | <undefined> | Path to the file containing the OAuth token to use when authenticating against the Kubernetes API server. Specify this as a path as opposed to a URI (i.e. do not provide a scheme) | string | 1.7.0 | +| kyuubi.kubernetes.context | <undefined> | The desired context from your kubernetes config file used to configure the K8s client for interacting with the cluster. | string | 1.6.0 | +| kyuubi.kubernetes.context.allow.list || The allowed kubernetes context list, if it is empty, there is no kubernetes context limitation. | set | 1.8.0 | +| kyuubi.kubernetes.master.address | <undefined> | The internal Kubernetes master (API server) address to be used for kyuubi. | string | 1.7.0 | +| kyuubi.kubernetes.namespace | default | The namespace that will be used for running the kyuubi pods and find engines. | string | 1.7.0 | +| kyuubi.kubernetes.namespace.allow.list || The allowed kubernetes namespace list, if it is empty, there is no kubernetes namespace limitation. | set | 1.8.0 | +| kyuubi.kubernetes.spark.appUrlPattern | http://{{SPARK_DRIVER_SVC}}.{{KUBERNETES_NAMESPACE}}.svc:{{SPARK_UI_PORT}} | The pattern to generate the spark on kubernetes application UI URL. The pattern should contain placeholders for the application variables. Available placeholders are `{{SPARK_APP_ID}}`, `{{SPARK_DRIVER_SVC}}`, `{{KUBERNETES_NAMESPACE}}`, `{{KUBERNETES_CONTEXT}}` and `{{SPARK_UI_PORT}}`. | string | 1.10.0 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.checkInterval | PT1M | Kyuubi server use guava cache as the cleanup trigger with time-based eviction, but the eviction would not happened until any get/put operation happened. This option schedule a daemon thread evict cache periodically. | duration | 1.8.1 | +| kyuubi.kubernetes.spark.cleanupTerminatedDriverPod.kind | NONE | Kyuubi server will delete the spark driver pod after the application terminates for kyuubi.kubernetes.terminatedApplicationRetainPeriod. Available options are NONE, ALL, COMPLETED and default value is None which means none of the pod will be deleted | string | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteDriverPodName.enabled | false | Whether to forcibly rewrite Spark driver pod name with 'kyuubi--driver'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.spark.forciblyRewriteExecutorPodNamePrefix.enabled | false | Whether to forcibly rewrite Spark executor pod name prefix with 'kyuubi-'. If disabled, Kyuubi will try to preserve the application name while satisfying K8s' pod name policy, but some vendors may have stricter Pod name policies, thus the generated name may become illegal. | boolean | 1.8.1 | +| kyuubi.kubernetes.terminatedApplicationRetainPeriod | PT5M | The period for which the Kyuubi server retains application information after the application terminates. | duration | 1.7.1 | +| kyuubi.kubernetes.trust.certificates | false | If set to true then client can submit to kubernetes cluster only with token | boolean | 1.7.0 | ### Lineage @@ -434,6 +437,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.server.limit.batch.connections.per.user | <undefined> | Maximum kyuubi server batch connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.7.0 | | kyuubi.server.limit.batch.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server batch connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.7.0 | | kyuubi.server.limit.client.fetch.max.rows | <undefined> | Max rows limit for getting result row set operation. If the max rows specified by client-side is larger than the limit, request will fail directly. | int | 1.8.0 | +| kyuubi.server.limit.connections.ip.deny.list || The client ip in the deny list will be denied to connect to kyuubi server. | set | 1.9.1 | | kyuubi.server.limit.connections.per.ipaddress | <undefined> | Maximum kyuubi server connections per ipaddress. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | | kyuubi.server.limit.connections.per.user | <undefined> | Maximum kyuubi server connections per user. Any user exceeding this limit will not be allowed to connect. | int | 1.6.0 | | kyuubi.server.limit.connections.per.user.ipaddress | <undefined> | Maximum kyuubi server connections per user:ipaddress combination. Any user-ipaddress exceeding this limit will not be allowed to connect. | int | 1.6.0 | diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index 9b7e67c3fb0..40cc2913b0d 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -181,7 +181,7 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin val interval = conf.get(ENGINE_REPORT_INTERVAL) val engineSpace = conf.get(HA_NAMESPACE) val statusTracker = spark.sparkContext.statusTracker - val metricsSpace = s"/metrics$engineSpace" + val metricsSpace = Utils.concatEngineMetricsPath(engineSpace) val report: Runnable = () => { if (!shutdown.get) { val openSessionCount = backendService.sessionManager.getOpenSessionCount @@ -198,7 +198,7 @@ case class SparkSQLEngine(spark: SparkSession) extends Serverable("SparkSQLEngin client.create(metricsSpace, "PERSISTENT") } client.setData( - s"/metrics$engineSpace", + Utils.concatEngineMetricsPath(engineSpace), engineMetrics.getBytes) } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala index f0b0fea9168..82f7c8b8e4d 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala @@ -456,4 +456,14 @@ object Utils extends Logging { } } + /** + * Concatenates the engine-specific path to the metrics endpoint. + * + * @param path Segment of the path (e.g., engine space) to append to the "/metrics" base path. + * @return The combined metrics path, formatted as "/metrics{path}", where {path} is the input. + */ + def concatEngineMetricsPath(path: String): String = { + s"/metrics$path" + } + } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 885da4f8c1c..e0deb018369 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -2278,7 +2278,7 @@ object KyuubiConf { val ENGINE_REPORT_INTERVAL: ConfigEntry[Long] = buildConf("kyuubi.engine.report.interval") - .doc("The check interval for engine report to the server") + .doc("The interval for the engine to report metrics when using the ADAPTIVE select policy.") .version("1.10.0") .timeConf .checkValue(_ >= Duration.ofSeconds(1).toMillis, "Minimum 1 seconds") diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala index 88b487f9ce3..43f680a87e6 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/EngineServiceDiscovery.scala @@ -19,6 +19,7 @@ package org.apache.kyuubi.ha.client import scala.util.control.NonFatal +import org.apache.kyuubi.Utils import org.apache.kyuubi.config.KyuubiConf.ENGINE_SHARE_LEVEL import org.apache.kyuubi.ha.HighAvailabilityConf.HA_NAMESPACE import org.apache.kyuubi.service.FrontendService @@ -34,7 +35,7 @@ class EngineServiceDiscovery( override def stop(): Unit = synchronized { if (!isServerLost.get()) { discoveryClient.deregisterService() - val path = s"/metrics${conf.get(HA_NAMESPACE)}" + val path = Utils.concatEngineMetricsPath(conf.get(HA_NAMESPACE)) if (discoveryClient.pathExists(path)) { discoveryClient.delete(path) } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index 3222fafa488..429e2e2f7e8 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -384,8 +384,8 @@ private[kyuubi] class EngineRef( private def getAdaptivePoolId(poolSize: Int): Int = { val sessionThreshold = conf.get(ENGINE_POOL_ADAPTIVE_SESSION_THRESHOLD) - val metricsSpace = - s"/metrics/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_$engineType/$sessionUser" + val metricsSpace = Utils.concatEngineMetricsPath( + s"/${serverSpace}_${KYUUBI_VERSION}_${shareLevel}_$engineType/$sessionUser") DiscoveryClientProvider.withDiscoveryClient(conf) { client => tryWithLock(client) { if (client.pathNonExists(metricsSpace)) { @@ -409,7 +409,7 @@ private[kyuubi] class EngineRef( val candidate = engineMetricsMap.filter(_.contains("poolID")) .minBy { map => ( - map.getOrElse("openSessionCount", sessionThreshold), + map.getOrElse("openSessionCount", 0), map.getOrElse("activeTask", 0)) } if ((candidate.nonEmpty && candidate("openSessionCount") < sessionThreshold) ||