Skip to content

Commit

Permalink
Framework for generating function docs from embedded code documentati…
Browse files Browse the repository at this point in the history
…on (#12668)

* Initial work on #12432 to allow for generation of udf docs from embedded documentation in the code

* Add missing license header.

* Fixed examples.

* Fixing a really weird RustRover/wsl ... something. No clue what happened there.

* permission change

* Cargo fmt update.

* Refactored Documentation to allow it to be used in a const.

* Add documentation for syntax_example

* Refactoring Documentation based on PR feedback.

* Cargo fmt update.

* Doc update

* Fixed copy/paste error.

* Minor text updates.

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
Omega359 and alamb authored Oct 3, 2024
1 parent 42ef58e commit 1340869
Show file tree
Hide file tree
Showing 26 changed files with 1,933 additions and 56 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ jobs:
run: taplo format --check

config-docs-check:
name: check configs.md is up-to-date
name: check configs.md and ***_functions.md is up-to-date
needs: [ linux-build-lib ]
runs-on: ubuntu-latest
container:
Expand All @@ -542,6 +542,11 @@ jobs:
# If you encounter an error, run './dev/update_config_docs.sh' and commit
./dev/update_config_docs.sh
git diff --exit-code
- name: Check if any of the ***_functions.md has been modified
run: |
# If you encounter an error, run './dev/update_function_docs.sh' and commit
./dev/update_function_docs.sh
git diff --exit-code
# Verify MSRV for the crates which are directly used by other projects:
# - datafusion
Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

265 changes: 265 additions & 0 deletions datafusion/core/src/bin/print_functions_docs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use datafusion::execution::SessionStateDefaults;
use datafusion_expr::{
aggregate_doc_sections, scalar_doc_sections, window_doc_sections, AggregateUDF,
DocSection, Documentation, ScalarUDF, WindowUDF,
};
use itertools::Itertools;
use std::env::args;
use std::fmt::Write as _;

fn main() {
let args: Vec<String> = args().collect();

if args.len() != 2 {
panic!(
"Usage: {} type (one of 'aggregate', 'scalar', 'window')",
args[0]
);
}

let function_type = args[1].trim().to_lowercase();
let docs = match function_type.as_str() {
"aggregate" => print_aggregate_docs(),
"scalar" => print_scalar_docs(),
"window" => print_window_docs(),
_ => {
panic!("Unknown function type: {}", function_type)
}
};

println!("{docs}");
}

fn print_aggregate_docs() -> String {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];

for f in SessionStateDefaults::default_aggregate_functions() {
providers.push(Box::new(f.as_ref().clone()));
}

print_docs(providers, aggregate_doc_sections::doc_sections())
}

fn print_scalar_docs() -> String {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];

for f in SessionStateDefaults::default_scalar_functions() {
providers.push(Box::new(f.as_ref().clone()));
}

print_docs(providers, scalar_doc_sections::doc_sections())
}

fn print_window_docs() -> String {
let mut providers: Vec<Box<dyn DocProvider>> = vec![];

for f in SessionStateDefaults::default_window_functions() {
providers.push(Box::new(f.as_ref().clone()));
}

print_docs(providers, window_doc_sections::doc_sections())
}

fn print_docs(
providers: Vec<Box<dyn DocProvider>>,
doc_sections: Vec<DocSection>,
) -> String {
let mut docs = "".to_string();

// doc sections only includes sections that have 'include' == true
for doc_section in doc_sections {
// make sure there is a function that is in this doc section
if !&providers.iter().any(|f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
} else {
false
}
}) {
continue;
}

let providers: Vec<&Box<dyn DocProvider>> = providers
.iter()
.filter(|&f| {
if let Some(documentation) = f.get_documentation() {
documentation.doc_section == doc_section
} else {
false
}
})
.collect::<Vec<_>>();

// write out section header
let _ = writeln!(docs, "## {} ", doc_section.label);

if let Some(description) = doc_section.description {
let _ = writeln!(docs, "{description}");
}

// names is a sorted list of function names and aliases since we display
// both in the documentation
let names = get_names_and_aliases(&providers);

// write out the list of function names and aliases
names.iter().for_each(|name| {
let _ = writeln!(docs, "- [{name}](#{name})");
});

// write out each function and alias in the order of the sorted name list
for name in names {
let f = providers
.iter()
.find(|f| f.get_name() == name || f.get_aliases().contains(&name))
.unwrap();

let name = f.get_name();
let aliases = f.get_aliases();
let documentation = f.get_documentation();

// if this name is an alias we need to display what it's an alias of
if aliases.contains(&name) {
let _ = write!(docs, "_Alias of [{name}](#{name})._");
continue;
}

// otherwise display the documentation for the function
let Some(documentation) = documentation else {
unreachable!()
};

// first, the name, description and syntax example
let _ = write!(
docs,
r#"
### `{}`
{}
```
{}
```
"#,
name, documentation.description, documentation.syntax_example
);

// next, arguments
if let Some(args) = &documentation.arguments {
let _ = writeln!(docs, "#### Arguments\n");
for (arg_name, arg_desc) in args {
let _ = writeln!(docs, "- **{arg_name}**: {arg_desc}");
}
}

// next, sql example if provided
if let Some(example) = &documentation.sql_example {
let _ = writeln!(
docs,
r#"
#### Example
{}
"#,
example
);
}

// next, aliases
if !f.get_aliases().is_empty() {
let _ = write!(docs, "#### Aliases");

for alias in f.get_aliases() {
let _ = writeln!(docs, "- {alias}");
}
}

// finally, any related udfs
if let Some(related_udfs) = &documentation.related_udfs {
let _ = writeln!(docs, "\n**Related functions**:");

for related in related_udfs {
let _ = writeln!(docs, "- [{related}](#{related})");
}
}
}
}

docs
}

trait DocProvider {
fn get_name(&self) -> String;
fn get_aliases(&self) -> Vec<String>;
fn get_documentation(&self) -> Option<&Documentation>;
}

impl DocProvider for AggregateUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}

impl DocProvider for ScalarUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}

impl DocProvider for WindowUDF {
fn get_name(&self) -> String {
self.name().to_string()
}
fn get_aliases(&self) -> Vec<String> {
self.aliases().iter().map(|a| a.to_string()).collect()
}
fn get_documentation(&self) -> Option<&Documentation> {
self.documentation()
}
}

#[allow(clippy::borrowed_box)]
#[allow(clippy::ptr_arg)]
fn get_names_and_aliases(functions: &Vec<&Box<dyn DocProvider>>) -> Vec<String> {
functions
.iter()
.flat_map(|f| {
if f.get_aliases().is_empty() {
vec![f.get_name().to_string()]
} else {
let mut names = vec![f.get_name().to_string()];
names.extend(f.get_aliases().iter().cloned());
names
}
})
.sorted()
.collect_vec()
}
10 changes: 7 additions & 3 deletions datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ mod partition_evaluator;
mod table_source;
mod udaf;
mod udf;
mod udf_docs;
mod udwf;

pub mod conditional_expressions;
Expand Down Expand Up @@ -90,9 +91,12 @@ pub use logical_plan::*;
pub use partition_evaluator::PartitionEvaluator;
pub use sqlparser;
pub use table_source::{TableProviderFilterPushDown, TableSource, TableType};
pub use udaf::{AggregateUDF, AggregateUDFImpl, ReversedUDAF, StatisticsArgs};
pub use udf::{ScalarUDF, ScalarUDFImpl};
pub use udwf::{ReversedUDWF, WindowUDF, WindowUDFImpl};
pub use udaf::{
aggregate_doc_sections, AggregateUDF, AggregateUDFImpl, ReversedUDAF, StatisticsArgs,
};
pub use udf::{scalar_doc_sections, ScalarUDF, ScalarUDFImpl};
pub use udf_docs::{DocSection, Documentation, DocumentationBuilder};
pub use udwf::{window_doc_sections, ReversedUDWF, WindowUDF, WindowUDFImpl};
pub use window_frame::{WindowFrame, WindowFrameBound, WindowFrameUnits};

#[cfg(test)]
Expand Down
Loading

0 comments on commit 1340869

Please sign in to comment.