From c1de3a64160eab9d8eacc039ba4c256fc587ec50 Mon Sep 17 00:00:00 2001 From: Mustafa Akur Date: Tue, 20 Feb 2024 11:04:22 +0300 Subject: [PATCH] Add comment to explain ratioanale of using IndexMap, and IndexSet --- datafusion/physical-expr/src/equivalence/properties.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/datafusion/physical-expr/src/equivalence/properties.rs b/datafusion/physical-expr/src/equivalence/properties.rs index de50ab5bd002..5a9a4f64876d 100644 --- a/datafusion/physical-expr/src/equivalence/properties.rs +++ b/datafusion/physical-expr/src/equivalence/properties.rs @@ -1118,6 +1118,11 @@ impl DependencyNode { } } +// Using `IndexMap` and `IndexSet` makes sure to generate consistent results across different executions for the same query. +// We could have used `HashSet`, `HashMap` in place of them without any loss of functionality. +// As an example, if existing orderings are `[a ASC, b ASC]`, `[c ASC]` for output ordering +// both `[a ASC, b ASC, c ASC]` and `[c ASC, a ASC, b ASC]` are valid (e.g. concatenated version of the alternative orderings). +// When using `HashSet`, `HashMap` it is not guaranteed to generate consistent result, among the possible 2 results in the example above. type DependencyMap = IndexMap; type Dependencies = IndexSet;