Skip to content

Commit 02ba5ad

Browse files
committed
Improve ScalarUDFImpl docs
1 parent 0228bee commit 02ba5ad

File tree

1 file changed

+27
-13
lines changed

1 file changed

+27
-13
lines changed

datafusion/expr/src/udf.rs

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,10 @@ impl ScalarUDF {
193193
self.inner.return_type_from_exprs(args, schema, arg_types)
194194
}
195195

196+
/// Return the datatype this function returns given the input argument types.
197+
///
198+
/// See [`ScalarUDFImpl::return_type_from_args`] for more details.
199+
196200
pub fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {
197201
self.inner.return_type_from_args(args)
198202
}
@@ -433,7 +437,6 @@ impl ReturnInfo {
433437
/// # use datafusion_expr::{col, ColumnarValue, Documentation, ScalarFunctionArgs, Signature, Volatility};
434438
/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF};
435439
/// # use datafusion_expr::scalar_doc_sections::DOC_SECTION_MATH;
436-
///
437440
/// /// This struct for a simple UDF that adds one to an int32
438441
/// #[derive(Debug)]
439442
/// struct AddOne {
@@ -494,7 +497,12 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
494497
/// Returns this function's name
495498
fn name(&self) -> &str;
496499

497-
/// Returns the user-defined display name of the UDF given the arguments
500+
/// Returns the user-defined display name of function, given the arguments
501+
///
502+
/// This can be used to customize the output column name generated by this
503+
/// function.
504+
///
505+
/// Defaults to `name(args[0], args[1], ...)`
498506
fn display_name(&self, args: &[Expr]) -> Result<String> {
499507
let names: Vec<String> = args.iter().map(ToString::to_string).collect();
500508
// TODO: join with ", " to standardize the formatting of Vec<Expr>, <https://github.com/apache/datafusion/issues/10364>
@@ -522,7 +530,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
522530
/// # Notes
523531
///
524532
/// If you provide an implementation for [`Self::return_type_from_args`],
525-
/// DataFusion will not call `return_type` (this function). In this case it
533+
/// DataFusion will not call `return_type` (this function). In such cases
526534
/// is recommended to return [`DataFusionError::Internal`].
527535
///
528536
/// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
@@ -538,18 +546,24 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
538546
self.return_type(arg_types)
539547
}
540548

541-
/// What [`DataType`] will be returned by this function, given the
542-
/// arguments?
543-
///
544-
/// Note most UDFs should implement [`Self::return_type`] and not this
545-
/// function. The output type for most functions only depends on the types
546-
/// of their inputs (e.g. `sqrt(f32)` is always `f32`).
549+
/// What type will be returned by this function, given the arguments?
547550
///
548551
/// By default, this function calls [`Self::return_type`] with the
549552
/// types of each argument.
550553
///
551-
/// This method can be overridden for functions that return different
552-
/// *types* based on the *values* of their arguments.
554+
/// # Notes
555+
///
556+
/// Most UDFs should implement [`Self::return_type`] and not this
557+
/// function as the output type for most functions only depends on the types
558+
/// of their inputs (e.g. `sqrt(f32)` is always `f32`).
559+
///
560+
/// This function can be used for more advanced cases such as:
561+
///
562+
/// 1. specifying nullability
563+
/// 2. return types based on the **values** of the arguments (rather than
564+
/// their **types**.
565+
///
566+
/// # Output Type based on Values
553567
///
554568
/// For example, the following two function calls get the same argument
555569
/// types (something and a `Utf8` string) but return different types based
@@ -558,9 +572,9 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
558572
/// * `arrow_cast(x, 'Int16')` --> `Int16`
559573
/// * `arrow_cast(x, 'Float32')` --> `Float32`
560574
///
561-
/// # Notes:
575+
/// # Requirements
562576
///
563-
/// This function must consistently return the same type for the same
577+
/// This function **must** consistently return the same type for the same
564578
/// logical input even if the input is simplified (e.g. it must return the same
565579
/// value for `('foo' | 'bar')` as it does for ('foobar').
566580
fn return_type_from_args(&self, args: ReturnTypeArgs) -> Result<ReturnInfo> {

0 commit comments

Comments
 (0)