Skip to content

Commit

Permalink
Improve PhysicalExpr and Column documentation (apache#12457)
Browse files Browse the repository at this point in the history
* Improve PhysicalExpr and Column documentation

* Apply suggestions from code review

Co-authored-by: Chunchun Ye <[email protected]>

---------

Co-authored-by: Chunchun Ye <[email protected]>
  • Loading branch information
alamb and appletreeisyellow authored Sep 14, 2024
1 parent 88b5970 commit befac37
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 7 deletions.
23 changes: 21 additions & 2 deletions datafusion/physical-expr-common/src/physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,27 @@ use datafusion_expr_common::columnar_value::ColumnarValue;
use datafusion_expr_common::interval_arithmetic::Interval;
use datafusion_expr_common::sort_properties::ExprProperties;

/// See [create_physical_expr](https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html)
/// for examples of creating `PhysicalExpr` from `Expr`
/// [`PhysicalExpr`]s represent expressions such as `A + 1` or `CAST(c1 AS int)`.
///
/// `PhysicalExpr` knows its type, nullability and can be evaluated directly on
/// a [`RecordBatch`] (see [`Self::evaluate`]).
///
/// `PhysicalExpr` are the physical counterpart to [`Expr`] used in logical
/// planning. They are typically created from [`Expr`] by a [`PhysicalPlanner`]
/// invoked from a higher level API
///
/// Some important examples of `PhysicalExpr` are:
/// * [`Column`]: Represents a column at a given index in a RecordBatch
///
/// To create `PhysicalExpr` from `Expr`, see
/// * [`SessionContext::create_physical_expr`]: A high level API
/// * [`create_physical_expr`]: A low level API
///
/// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
/// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
/// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
/// [`create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/fn.create_physical_expr.html
/// [`Column`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/expressions/struct.Column.html
pub trait PhysicalExpr: Send + Sync + Display + Debug + PartialEq<dyn Any> {
/// Returns the physical expression as [`Any`] so that it can be
/// downcast to a specific implementation.
Expand Down
45 changes: 40 additions & 5 deletions datafusion/physical-expr/src/expressions/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! Column expression
//! Physical column reference: [`Column`]
use std::any::Any;
use std::hash::{Hash, Hasher};
Expand All @@ -33,32 +33,67 @@ use datafusion_expr::ColumnarValue;
use crate::physical_expr::{down_cast_any_ref, PhysicalExpr};

/// Represents the column at a given index in a RecordBatch
///
/// This is a physical expression that represents a column at a given index in an
/// arrow [`Schema`] / [`RecordBatch`].
///
/// Unlike the [logical `Expr::Column`], this expression is always resolved by schema index,
/// even though it does have a name. This is because the physical plan is always
/// resolved to a specific schema and there is no concept of "relation"
///
/// # Example:
/// If the schema is `a`, `b`, `c` the `Column` for `b` would be represented by
/// index 1, since `b` is the second colum in the schema.
///
/// ```
/// # use datafusion_physical_expr::expressions::Column;
/// # use arrow::datatypes::{DataType, Field, Schema};
/// // Schema with columns a, b, c
/// let schema = Schema::new(vec![
/// Field::new("a", DataType::Int32, false),
/// Field::new("b", DataType::Int32, false),
/// Field::new("c", DataType::Int32, false),
/// ]);
///
/// // reference to column b is index 1
/// let column_b = Column::new_with_schema("b", &schema).unwrap();
/// assert_eq!(column_b.index(), 1);
///
/// // reference to column c is index 2
/// let column_c = Column::new_with_schema("c", &schema).unwrap();
/// assert_eq!(column_c.index(), 2);
/// ```
/// [logical `Expr::Column`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html#variant.Column
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
pub struct Column {
/// The name of the column (used for debugging and display purposes)
name: String,
/// The index of the column in its schema
index: usize,
}

impl Column {
/// Create a new column expression
/// Create a new column expression which references the
/// column with the given index in the schema.
pub fn new(name: &str, index: usize) -> Self {
Self {
name: name.to_owned(),
index,
}
}

/// Create a new column expression based on column name and schema
/// Create a new column expression which references the
/// column with the given name in the schema
pub fn new_with_schema(name: &str, schema: &Schema) -> Result<Self> {
Ok(Column::new(name, schema.index_of(name)?))
}

/// Get the column name
/// Get the column's name
pub fn name(&self) -> &str {
&self.name
}

/// Get the column index
/// Get the column's schema index
pub fn index(&self) -> usize {
self.index
}
Expand Down

0 comments on commit befac37

Please sign in to comment.