Skip to content

Commit dd3a003

Browse files
authored
Minor: Improve documentation of MemoryPool (#6388)
1 parent 53d6987 commit dd3a003

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

datafusion/execution/src/memory_pool/mod.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,30 @@ pub mod proxy;
2525

2626
pub use pool::*;
2727

28-
/// The pool of memory on which [`MemoryReservation`] record their memory reservations
28+
/// The pool of memory on which [`MemoryReservation`]s record their
29+
/// memory reservations.
30+
///
31+
/// DataFusion is a streaming query engine, processing most queries
32+
/// without buffering the entire input. However, certain operations
33+
/// such as sorting and grouping/joining with a large number of
34+
/// distinct groups/keys, can require buffering intermediate results
35+
/// and for large datasets this can require large amounts of memory.
36+
///
37+
/// In order to avoid allocating memory until the OS or the container
38+
/// system kills the process, DataFusion operators only allocate
39+
/// memory they are able to reserve from the configured
40+
/// [`MemoryPool`]. Once the memory tracked by the pool is exhausted,
41+
/// operators must either free memory by spilling to local disk or
42+
/// error.
43+
///
44+
/// A `MemoryPool` can be shared by concurrently executing plans in
45+
/// the same process to control memory usage in a multi-tenant system.
46+
///
47+
/// The following memory pool implementations are available:
48+
///
49+
/// * [`UnboundedMemoryPool`](pool::UnboundedMemoryPool)
50+
/// * [`GreedyMemoryPool`](pool::GreedyMemoryPool)
51+
/// * [`FairSpillPool`](pool::FairSpillPool)
2952
pub trait MemoryPool: Send + Sync + std::fmt::Debug {
3053
/// Registers a new [`MemoryConsumer`]
3154
///

datafusion/execution/src/memory_pool/pool.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
use crate::memory_pool::{MemoryConsumer, MemoryPool, MemoryReservation};
1919
use datafusion_common::{DataFusionError, Result};
20+
use log::debug;
2021
use parking_lot::Mutex;
2122
use std::sync::atomic::{AtomicUsize, Ordering};
2223

@@ -45,7 +46,11 @@ impl MemoryPool for UnboundedMemoryPool {
4546
}
4647
}
4748

48-
/// A [`MemoryPool`] that implements a greedy first-come first-serve limit
49+
/// A [`MemoryPool`] that implements a greedy first-come first-serve limit.
50+
///
51+
/// This pool works well for queries that do not need to spill or have
52+
/// a single spillable operator. See [`GreedyMemoryPool`] if there are
53+
/// multiple spillable operators that all will spill.
4954
#[derive(Debug)]
5055
pub struct GreedyMemoryPool {
5156
pool_size: usize,
@@ -55,6 +60,7 @@ pub struct GreedyMemoryPool {
5560
impl GreedyMemoryPool {
5661
/// Allocate up to `limit` bytes
5762
pub fn new(pool_size: usize) -> Self {
63+
debug!("Created new GreedyMemoryPool(pool_size={pool_size})");
5864
Self {
5965
pool_size,
6066
used: AtomicUsize::new(0),
@@ -92,6 +98,13 @@ impl MemoryPool for GreedyMemoryPool {
9298
/// an even fraction of the available memory sans any unspillable reservations
9399
/// (i.e. `(pool_size - unspillable_memory) / num_spillable_reservations`)
94100
///
101+
/// This pool works best when you know beforehand the query has
102+
/// multiple spillable operators that will likely all need to
103+
/// spill. Sometimes it will cause spills even when there was
104+
/// sufficient memory (reserved for other operators) to avoid doing
105+
/// so.
106+
///
107+
/// ```text
95108
/// ┌───────────────────────z──────────────────────z───────────────┐
96109
/// │ z z │
97110
/// │ z z │
@@ -100,6 +113,7 @@ impl MemoryPool for GreedyMemoryPool {
100113
/// │ z z │
101114
/// │ z z │
102115
/// └───────────────────────z──────────────────────z───────────────┘
116+
/// ```
103117
///
104118
/// Unspillable memory is allocated in a first-come, first-serve fashion
105119
#[derive(Debug)]

0 commit comments

Comments
 (0)