Skip to content

Commit a384809

Browse files
authored
chore: Update api docs for SessionContext, TaskContext, etc (#6106)
* chore: Update api docs for `SessionContext`, `TaskContext`, etc * clarify RuntimeEnv resource enforcement
1 parent 99bc393 commit a384809

File tree

5 files changed

+73
-46
lines changed

5 files changed

+73
-46
lines changed

datafusion/core/src/execution/context.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! SessionContext contains methods for registering data sources and executing queries
18+
//! [`SessionContext`] contains methods for registering data sources and executing queries
1919
use crate::{
2020
catalog::catalog::{CatalogList, MemoryCatalogList},
2121
datasource::{
@@ -158,11 +158,15 @@ where
158158
}
159159
}
160160

161-
/// SessionContext is the main interface for executing queries with DataFusion. It stands for
162-
/// the connection between user and DataFusion/Ballista cluster.
163-
/// The context provides the following functionality
161+
/// Main interface for executing queries with DataFusion. Maintains
162+
/// the state of the connection between a user and an instance of the
163+
/// DataFusion engine.
164164
///
165-
/// * Create DataFrame from a CSV or Parquet data source.
165+
/// # Overview
166+
///
167+
/// [`SessionContext`] provides the following functionality:
168+
///
169+
/// * Create a DataFrame from a CSV or Parquet data source.
166170
/// * Register a CSV or Parquet data source as a table that can be referenced from a SQL query.
167171
/// * Register a custom data source that can be referenced from a SQL query.
168172
/// * Execution a SQL query
@@ -199,6 +203,20 @@ where
199203
/// # Ok(())
200204
/// # }
201205
/// ```
206+
///
207+
/// # `SessionContext`, `SessionState`, and `TaskContext`
208+
///
209+
/// A [`SessionContext`] can be created from a [`SessionConfig`] and
210+
/// stores the state for a particular query session. A single
211+
/// [`SessionContext`] can run multiple queries.
212+
///
213+
/// [`SessionState`] contains information available during query
214+
/// planning (creating [`LogicalPlan`]s and [`ExecutionPlan`]s).
215+
///
216+
/// [`TaskContext`] contains the state available during query
217+
/// execution [`ExecutionPlan::execute`]. It contains a subset of the
218+
/// information in[`SessionState`] and is created from a
219+
/// [`SessionContext`] or a [`SessionState`].
202220
#[derive(Clone)]
203221
pub struct SessionContext {
204222
/// UUID for the session
@@ -216,7 +234,7 @@ impl Default for SessionContext {
216234
}
217235

218236
impl SessionContext {
219-
/// Creates a new execution context using a default session configuration.
237+
/// Creates a new `SessionContext` using the default [`SessionConfig`].
220238
pub fn new() -> Self {
221239
Self::with_config(SessionConfig::new())
222240
}
@@ -241,19 +259,35 @@ impl SessionContext {
241259
Ok(())
242260
}
243261

244-
/// Creates a new session context using the provided session configuration.
262+
/// Creates a new `SessionContext` using the provided
263+
/// [`SessionConfig`] and a new [`RuntimeEnv`].
264+
///
265+
/// See [`Self::with_config_rt`] for more details on resource
266+
/// limits.
245267
pub fn with_config(config: SessionConfig) -> Self {
246268
let runtime = Arc::new(RuntimeEnv::default());
247269
Self::with_config_rt(config, runtime)
248270
}
249271

250-
/// Creates a new session context using the provided configuration and [`RuntimeEnv`].
272+
/// Creates a new `SessionContext` using the provided
273+
/// [`SessionConfig`] and a [`RuntimeEnv`].
274+
///
275+
/// # Resource Limits
276+
///
277+
/// By default, each new `SessionContext` creates a new
278+
/// `RuntimeEnv`, and therefore will not enforce memory or disk
279+
/// limits for queries run on different `SessionContext`s.
280+
///
281+
/// To enforce resource limits (e.g. to limit the total amount of
282+
/// memory used) across all DataFusion queries in a process,
283+
/// all `SessionContext`'s should be configured with the
284+
/// same `RuntimeEnv`.
251285
pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
252286
let state = SessionState::with_config_rt(config, runtime);
253287
Self::with_state(state)
254288
}
255289

256-
/// Creates a new session context using the provided session state.
290+
/// Creates a new `SessionContext` using the provided [`SessionState`]
257291
pub fn with_state(state: SessionState) -> Self {
258292
Self {
259293
session_id: state.session_id.clone(),
@@ -262,7 +296,7 @@ impl SessionContext {
262296
}
263297
}
264298

265-
/// Returns the time this session was created
299+
/// Returns the time this `SessionContext` was created
266300
pub fn session_start_time(&self) -> DateTime<Utc> {
267301
self.session_start_time
268302
}
@@ -282,12 +316,12 @@ impl SessionContext {
282316
)
283317
}
284318

285-
/// Return the [RuntimeEnv] used to run queries with this [SessionContext]
319+
/// Return the [RuntimeEnv] used to run queries with this `SessionContext`
286320
pub fn runtime_env(&self) -> Arc<RuntimeEnv> {
287321
self.state.read().runtime_env.clone()
288322
}
289323

290-
/// Return the `session_id` of this Session
324+
/// Returns an id that uniquely identifies this `SessionContext`.
291325
pub fn session_id(&self) -> String {
292326
self.session_id.clone()
293327
}
@@ -1205,7 +1239,7 @@ impl QueryPlanner for DefaultQueryPlanner {
12051239
/// Execution context for registering data sources and executing queries
12061240
#[derive(Clone)]
12071241
pub struct SessionState {
1208-
/// UUID for the session
1242+
/// A unique UUID that identifies the session
12091243
session_id: String,
12101244
/// Responsible for analyzing and rewrite a logical plan before optimization
12111245
analyzer: Analyzer,
@@ -1252,7 +1286,8 @@ pub fn default_session_builder(config: SessionConfig) -> SessionState {
12521286
}
12531287

12541288
impl SessionState {
1255-
/// Returns new SessionState using the provided configuration and runtime
1289+
/// Returns new [`SessionState`] using the provided
1290+
/// [`SessionConfig`] and [`RuntimeEnv`].
12561291
pub fn with_config_rt(config: SessionConfig, runtime: Arc<RuntimeEnv>) -> Self {
12571292
let catalog_list = Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
12581293
Self::with_config_rt_and_catalog_list(config, runtime, catalog_list)

datafusion/core/src/execution/mod.rs

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,30 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! This module contains the shared state available at different parts
19-
//! of query planning and execution
20-
//!
21-
//! # Runtime Environment
22-
//!
23-
//! [`runtime_env::RuntimeEnv`] can be created from a [`runtime_env::RuntimeConfig`] and
24-
//! stores state to be shared across multiple sessions. In most applications there will
25-
//! be a single [`runtime_env::RuntimeEnv`] for the entire process
26-
//!
27-
//! # Session Context
28-
//!
29-
//! [`context::SessionContext`] can be created from a [`context::SessionConfig`] and
30-
//! an optional [`runtime_env::RuntimeConfig`], and stores the state for a particular
31-
//! query session.
32-
//!
33-
//! In particular [`context::SessionState`] is the information available to query planning
34-
//!
35-
//! # Task Context
36-
//!
37-
//! [`context::TaskContext`] is typically created from a [`context::SessionContext`] or
38-
//! [`context::SessionState`], and represents the state available to query execution.
39-
//!
40-
//! In particular it is the state passed to [`crate::physical_plan::ExecutionPlan::execute`]
41-
//!
18+
//! Shared state for query planning and execution.
4219
4320
pub mod context;
4421
// backwards compatibility

datafusion/execution/src/runtime_env.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! Execution runtime environment that holds object Store, memory manager, disk manager
19-
//! and various system level components that are used during physical plan execution.
18+
//! Execution [`RuntimeEnv`] environment that manages access to object
19+
//! store, memory manager, disk manager.
2020
2121
use crate::{
2222
disk_manager::{DiskManager, DiskManagerConfig},
@@ -32,7 +32,15 @@ use std::sync::Arc;
3232
use url::Url;
3333

3434
#[derive(Clone)]
35-
/// Execution runtime environment.
35+
/// Execution runtime environment that manages system resources such
36+
/// as memory, disk and storage.
37+
///
38+
/// A [`RuntimeEnv`] is created from a [`RuntimeConfig`] and has the
39+
/// following resource management functionality:
40+
///
41+
/// * [`MemoryPool`]: Manage memory
42+
/// * [`DiskManager`]: Manage temporary files on local disk
43+
/// * [`ObjectStoreRegistry`]: Manage mapping URLs to object store instances
3644
pub struct RuntimeEnv {
3745
/// Runtime memory management
3846
pub memory_pool: Arc<dyn MemoryPool>,

datafusion/execution/src/task.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ use crate::{
3232
};
3333

3434
/// Task Execution Context
35+
///
36+
/// A [`TaskContext`] has represents the state available during a single query's
37+
/// execution.
38+
///
39+
/// # Task Context
3540
pub struct TaskContext {
3641
/// Session Id
3742
session_id: String,
@@ -98,7 +103,7 @@ impl TaskContext {
98103
))
99104
}
100105

101-
/// Return the SessionConfig associated with the Task
106+
/// Return the SessionConfig associated with this [TaskContext]
102107
pub fn session_config(&self) -> &SessionConfig {
103108
&self.session_config
104109
}

datafusion/physical-expr/src/execution_props.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ use chrono::{DateTime, TimeZone, Utc};
2020
use std::collections::HashMap;
2121
use std::sync::Arc;
2222

23-
/// Holds per-execution properties and data (such as starting timestamps, etc).
24-
/// An instance of this struct is created each time a [`LogicalPlan`] is prepared for
25-
/// execution (optimized). If the same plan is optimized multiple times, a new
26-
/// `ExecutionProps` is created each time.
23+
/// Holds per-query execution properties and data (such as statment
24+
/// starting timestamps).
25+
///
26+
/// An [`ExecutionProps`] is created each time a [`LogicalPlan`] is
27+
/// prepared for execution (optimized). If the same plan is optimized
28+
/// multiple times, a new `ExecutionProps` is created each time.
2729
///
2830
/// It is important that this structure be cheap to create as it is
2931
/// done so during predicate pruning and expression simplification

0 commit comments

Comments
 (0)