Skip to content

Commit

Permalink
doc-gen: migrate scalar functions (other, conditional, and struct) do…
Browse files Browse the repository at this point in the history
…cumentation
  • Loading branch information
Chen-Yuan-Lai committed Jan 17, 2025
1 parent 0c229d7 commit c443ea5
Show file tree
Hide file tree
Showing 12 changed files with 336 additions and 430 deletions.
58 changes: 27 additions & 31 deletions datafusion/functions/src/core/arrow_cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@ use datafusion_common::{
ExprSchema, Result, ScalarValue,
};
use std::any::Any;
use std::sync::OnceLock;

use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
use datafusion_expr::{
ColumnarValue, Documentation, Expr, ExprSchemable, ScalarUDFImpl, Signature,
Volatility,
};
use datafusion_macros::user_doc;

/// Implements casting to arbitrary arrow types (rather than SQL types)
///
Expand All @@ -53,6 +52,31 @@ use datafusion_expr::{
/// ```sql
/// select arrow_cast(column_x, 'Float64')
/// ```
#[user_doc(
doc_section(label = "Other Functions"),
description = "Casts a value to a specific Arrow data type.",
syntax_example = "arrow_cast(expression, datatype)",
sql_example = r#"```sql
> select arrow_cast(-5, 'Int8') as a,
arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
arrow_cast('bar', 'LargeUtf8') as c,
arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
;
+----+-----+-----+---------------------------+
| a | b | c | d |
+----+-----+-----+---------------------------+
| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
+----+-----+-----+---------------------------+
```"#,
argument(
name = "expression",
description = "Expression to cast. The expression can be a constant, column, or function, and any combination of operators."
),
argument(
name = "datatype",
description = "[Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`]"
)
)]
#[derive(Debug)]
pub struct ArrowCastFunc {
signature: Signature,
Expand Down Expand Up @@ -139,38 +163,10 @@ impl ScalarUDFImpl for ArrowCastFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_arrow_cast_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_arrow_cast_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_OTHER,
"Casts a value to a specific Arrow data type.",
"arrow_cast(expression, datatype)")
.with_sql_example(
r#"```sql
> select arrow_cast(-5, 'Int8') as a,
arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
arrow_cast('bar', 'LargeUtf8') as c,
arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
;
+----+-----+-----+---------------------------+
| a | b | c | d |
+----+-----+-----+---------------------------+
| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
+----+-----+-----+---------------------------+
```"#,
)
.with_argument("expression", "Expression to cast. The expression can be a constant, column, or function, and any combination of operators.")
.with_argument("datatype", "[Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`]")
.build()
})
}

/// Returns the requested type from the arguments
fn data_type_from_args(args: &[Expr]) -> Result<DataType> {
if args.len() != 2 {
Expand Down
48 changes: 20 additions & 28 deletions datafusion/functions/src/core/arrowtypeof.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,29 @@

use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result, ScalarValue};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion_expr::{ColumnarValue, Documentation};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::OnceLock;

#[user_doc(
doc_section(label = "Other Functions"),
description = "Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression.",
syntax_example = "arrow_typeof(expression)",
sql_example = r#"```sql
> select arrow_typeof('foo'), arrow_typeof(1);
+---------------------------+------------------------+
| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) |
+---------------------------+------------------------+
| Utf8 | Int64 |
+---------------------------+------------------------+
```
"#,
argument(
name = "expression",
description = "Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators."
)
)]
#[derive(Debug)]
pub struct ArrowTypeOfFunc {
signature: Signature,
Expand Down Expand Up @@ -77,31 +94,6 @@ impl ScalarUDFImpl for ArrowTypeOfFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_arrowtypeof_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_arrowtypeof_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_OTHER,
"Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression.",

"arrow_typeof(expression)")
.with_sql_example(
r#"```sql
> select arrow_typeof('foo'), arrow_typeof(1);
+---------------------------+------------------------+
| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) |
+---------------------------+------------------------+
| Utf8 | Int64 |
+---------------------------+------------------------+
```
"#,
)
.with_argument("expression", "Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators.")
.build()
})
}
47 changes: 19 additions & 28 deletions datafusion/functions/src/core/coalesce.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,29 @@ use arrow::compute::{and, is_not_null, is_null};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, ExprSchema, Result};
use datafusion_expr::binary::try_type_union_resolution;
use datafusion_expr::scalar_doc_sections::DOC_SECTION_CONDITIONAL;
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use itertools::Itertools;
use std::any::Any;
use std::sync::OnceLock;

#[user_doc(
doc_section(label = "Conditional Functions"),
description = "Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.",
syntax_example = "coalesce(expression1[, ..., expression_n])",
sql_example = r#"```sql
> select coalesce(null, null, 'datafusion');
+----------------------------------------+
| coalesce(NULL,NULL,Utf8("datafusion")) |
+----------------------------------------+
| datafusion |
+----------------------------------------+
```"#,
argument(
name = "expression1, expression_n",
description = "Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary."
)
)]
#[derive(Debug)]
pub struct CoalesceFunc {
signature: Signature,
Expand Down Expand Up @@ -146,35 +162,10 @@ impl ScalarUDFImpl for CoalesceFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_coalesce_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_coalesce_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_CONDITIONAL,
"Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values.",
"coalesce(expression1[, ..., expression_n])")
.with_sql_example(r#"```sql
> select coalesce(null, null, 'datafusion');
+----------------------------------------+
| coalesce(NULL,NULL,Utf8("datafusion")) |
+----------------------------------------+
| datafusion |
+----------------------------------------+
```"#,
)
.with_argument(
"expression1, expression_n",
"Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary."
)
.build()
})
}

#[cfg(test)]
mod test {
use arrow::datatypes::DataType;
Expand Down
94 changes: 43 additions & 51 deletions datafusion/functions/src/core/getfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,52 @@ use datafusion_common::cast::{as_map_array, as_struct_array};
use datafusion_common::{
exec_err, plan_datafusion_err, plan_err, ExprSchema, Result, ScalarValue,
};
use datafusion_expr::scalar_doc_sections::DOC_SECTION_OTHER;
use datafusion_expr::{ColumnarValue, Documentation, Expr, ExprSchemable};
use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
use datafusion_macros::user_doc;
use std::any::Any;
use std::sync::{Arc, OnceLock};
use std::sync::Arc;

#[user_doc(
doc_section(label = "Other Functions"),
description = r#"Returns a field within a map or a struct with the given key.
Note: most users invoke `get_field` indirectly via field access
syntax such as `my_struct_col['field_name']` which results in a call to
`get_field(my_struct_col, 'field_name')`."#,
syntax_example = "get_field(expression1, expression2)",
sql_example = r#"```sql
> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
> select struct(idx, v) from t as c;
+-------------------------+
| struct(c.idx,c.v) |
+-------------------------+
| {c0: data, c1: fusion} |
| {c0: apache, c1: arrow} |
+-------------------------+
> select get_field((select struct(idx, v) from t), 'c0');
+-----------------------+
| struct(t.idx,t.v)[c0] |
+-----------------------+
| data |
| apache |
+-----------------------+
> select get_field((select struct(idx, v) from t), 'c1');
+-----------------------+
| struct(t.idx,t.v)[c1] |
+-----------------------+
| fusion |
| arrow |
+-----------------------+
```"#,
argument(
name = "expression1",
description = "The map or struct to retrieve a field for."
),
argument(
name = "expression2",
description = "The field name in the map or struct to retrieve data for. Must evaluate to a string."
)
)]
#[derive(Debug)]
pub struct GetFieldFunc {
signature: Signature,
Expand Down Expand Up @@ -241,54 +281,6 @@ impl ScalarUDFImpl for GetFieldFunc {
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_getfield_doc())
self.doc()
}
}

static DOCUMENTATION: OnceLock<Documentation> = OnceLock::new();

fn get_getfield_doc() -> &'static Documentation {
DOCUMENTATION.get_or_init(|| {
Documentation::builder(
DOC_SECTION_OTHER,
r#"Returns a field within a map or a struct with the given key.
Note: most users invoke `get_field` indirectly via field access
syntax such as `my_struct_col['field_name']` which results in a call to
`get_field(my_struct_col, 'field_name')`."#,
"get_field(expression1, expression2)")
.with_sql_example(r#"```sql
> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow');
> select struct(idx, v) from t as c;
+-------------------------+
| struct(c.idx,c.v) |
+-------------------------+
| {c0: data, c1: fusion} |
| {c0: apache, c1: arrow} |
+-------------------------+
> select get_field((select struct(idx, v) from t), 'c0');
+-----------------------+
| struct(t.idx,t.v)[c0] |
+-----------------------+
| data |
| apache |
+-----------------------+
> select get_field((select struct(idx, v) from t), 'c1');
+-----------------------+
| struct(t.idx,t.v)[c1] |
+-----------------------+
| fusion |
| arrow |
+-----------------------+
```
"#)
.with_argument(
"expression1",
"The map or struct to retrieve a field for."
)
.with_argument(
"expression2",
"The field name in the map or struct to retrieve data for. Must evaluate to a string."
)
.build()
})
}
Loading

0 comments on commit c443ea5

Please sign in to comment.