From e0042bdff74a6895bff04d9fbd42ad4c4713369b Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 20 Sep 2023 15:49:05 -0700 Subject: [PATCH 1/7] Speed up TopoSort. The main sorting algorithm implementation in TopoSort::sort_worker is 11-12x faster. Overall, the complete sequence of building the graph and sorting is about 2.5-3x faster. The overall impact in e.g. the replace_const_cells optimization pass is a ~25% speedup. End-to-end impact on our synthesis flow is about 3%. --- kernel/utils.h | 182 +++++++++++++++++++++++--------------- passes/cmds/glift.cc | 2 +- passes/opt/opt_expr.cc | 15 ++-- passes/opt/share.cc | 2 +- passes/techmap/flatten.cc | 2 +- 5 files changed, 125 insertions(+), 78 deletions(-) diff --git a/kernel/utils.h b/kernel/utils.h index d37f045ff7c..e452f380e63 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -31,35 +31,31 @@ YOSYS_NAMESPACE_BEGIN // A map-like container, but you can save and restore the state // ------------------------------------------------ -template> -struct stackmap -{ -private: - std::vector> backup_state; +template > struct stackmap { + private: + std::vector> backup_state; dict current_state; static T empty_tuple; -public: - stackmap() { } - stackmap(const dict &other) : current_state(other) { } + public: + stackmap() {} + stackmap(const dict &other) : current_state(other) {} - template - void operator=(const Other &other) + template stackmap &operator=(const Other &other) { - for (auto &it : current_state) + for (const auto &it : current_state) if (!backup_state.empty() && backup_state.back().count(it.first) == 0) backup_state.back()[it.first] = new T(it.second); current_state.clear(); - for (auto &it : other) + for (const auto &it : other) set(it.first, it.second); - } - bool has(const Key &k) - { - return current_state.count(k) != 0; + return *this; } + bool has(const Key &k) { return current_state.count(k) != 0; } + void set(const Key &k, const T &v) { if (!backup_state.empty() && backup_state.back().count(k) == 0) @@ -83,7 +79,7 @@ struct stackmap void reset(const Key &k) { - for (int i = GetSize(backup_state)-1; i >= 0; i--) + for (int i = GetSize(backup_state) - 1; i >= 0; i--) if (backup_state[i].count(k) != 0) { if (backup_state[i].at(k) == nullptr) current_state.erase(k); @@ -94,20 +90,14 @@ struct stackmap current_state.erase(k); } - const dict &stdmap() - { - return current_state; - } + const dict &stdmap() { return current_state; } - void save() - { - backup_state.resize(backup_state.size()+1); - } + void save() { backup_state.resize(backup_state.size() + 1); } void restore() { log_assert(!backup_state.empty()); - for (auto &it : backup_state.back()) + for (const auto &it : backup_state.back()) if (it.second != nullptr) { current_state[it.first] = *it.second; delete it.second; @@ -123,46 +113,116 @@ struct stackmap } }; - // ------------------------------------------------ // A simple class for topological sorting // ------------------------------------------------ -template> -struct TopoSort +template , typename OPS = hash_ops> class TopoSort { - bool analyze_loops, found_loops; - std::map, C> database; - std::set> loops; + public: + // We use this ordering of the edges in the adjacency matrix for + // exact compatibility with an older implementation. + struct IndirectCmp { + IndirectCmp(const std::vector &nodes) : nodes_(nodes) {} + bool operator()(int a, int b) const + { + log_assert(static_cast(a) < nodes_.size()); + log_assert(static_cast(b) < nodes_.size()); + return node_cmp_(nodes_[a], nodes_[b]); + } + const C node_cmp_; + const std::vector &nodes_; + }; + + bool analyze_loops; + std::map node_to_index; + std::vector> edges; std::vector sorted; + std::set> loops; - TopoSort() + public: + TopoSort() : indirect_cmp(nodes) { analyze_loops = true; found_loops = false; } - void node(T n) + int node(T n) + { + auto it = node_to_index.find(n); + if (it == node_to_index.end()) { + int index = static_cast(nodes.size()); + node_to_index[n] = index; + nodes.push_back(n); + edges.push_back(std::set(indirect_cmp)); + return index; + } + return it->second; + } + + void edge(int l_index, int r_index) { edges[r_index].insert(l_index); } + + void edge(T left, T right) { edge(node(left), node(right)); } + + bool has_edges(const T &node) + { + auto it = node_to_index.find(node); + return it == node_to_index.end() || !edges[it->second].empty(); + } + + bool sort() { - if (database.count(n) == 0) - database[n] = std::set(); + log_assert(GetSize(node_to_index) == GetSize(edges)); + log_assert(GetSize(nodes) == GetSize(edges)); + + loops.clear(); + sorted.clear(); + found_loops = false; + + std::vector marked_cells(edges.size(), false); + std::vector active_cells(edges.size(), false); + std::vector active_stack; + + marked_cells.reserve(edges.size()); + sorted.reserve(edges.size()); + + for (const auto &it : node_to_index) + sort_worker(it.second, marked_cells, active_cells, active_stack); + + log_assert(GetSize(sorted) == GetSize(nodes)); + + return !found_loops; } - void edge(T left, T right) + // Build the more expensive representation of edges for + // a few passes that use it directly. + std::map, C> get_database() { - node(left); - database[right].insert(left); + std::map, C> database; + for (size_t i = 0; i < nodes.size(); ++i) { + std::set converted_edge_set; + for (int other_node : edges[i]) { + converted_edge_set.insert(nodes[other_node]); + } + database.emplace(nodes[i], converted_edge_set); + } + return database; } - void sort_worker(const T &n, std::set &marked_cells, std::set &active_cells, std::vector &active_stack) + private: + bool found_loops; + std::vector nodes; + const IndirectCmp indirect_cmp; + void sort_worker(const int root_index, std::vector &marked_cells, std::vector &active_cells, std::vector &active_stack) { - if (active_cells.count(n)) { + if (active_cells[root_index]) { found_loops = true; if (analyze_loops) { std::set loop; - for (int i = GetSize(active_stack)-1; i >= 0; i--) { - loop.insert(active_stack[i]); - if (active_stack[i] == n) + for (int i = GetSize(active_stack) - 1; i >= 0; i--) { + const int index = active_stack[i]; + loop.insert(nodes[index]); + if (index == root_index) break; } loops.insert(loop); @@ -170,42 +230,24 @@ struct TopoSort return; } - if (marked_cells.count(n)) + if (marked_cells[root_index]) return; - if (!database.at(n).empty()) - { + if (!edges[root_index].empty()) { if (analyze_loops) - active_stack.push_back(n); - active_cells.insert(n); + active_stack.push_back(root_index); + active_cells[root_index] = true; - for (auto &left_n : database.at(n)) + for (int left_n : edges[root_index]) sort_worker(left_n, marked_cells, active_cells, active_stack); if (analyze_loops) active_stack.pop_back(); - active_cells.erase(n); + active_cells[root_index] = false; } - marked_cells.insert(n); - sorted.push_back(n); - } - - bool sort() - { - loops.clear(); - sorted.clear(); - found_loops = false; - - std::set marked_cells; - std::set active_cells; - std::vector active_stack; - - for (auto &it : database) - sort_worker(it.first, marked_cells, active_cells, active_stack); - - log_assert(GetSize(sorted) == GetSize(database)); - return !found_loops; + marked_cells[root_index] = true; + sorted.push_back(nodes[root_index]); } }; diff --git a/passes/cmds/glift.cc b/passes/cmds/glift.cc index 439ded07685..faa4289e3a9 100644 --- a/passes/cmds/glift.cc +++ b/passes/cmds/glift.cc @@ -582,7 +582,7 @@ struct GliftPass : public Pass { for (auto cell : module->selected_cells()) { RTLIL::Module *tpl = design->module(cell->type); if (tpl != nullptr) { - if (topo_modules.database.count(tpl) == 0) + if (!topo_modules.has_edges(tpl)) worklist.push_back(tpl); topo_modules.edge(tpl, module); non_top_modules.insert(cell->type); diff --git a/passes/opt/opt_expr.cc b/passes/opt/opt_expr.cc index 46773a344b1..7331d72a6fd 100644 --- a/passes/opt/opt_expr.cc +++ b/passes/opt/opt_expr.cc @@ -424,13 +424,18 @@ void replace_const_cells(RTLIL::Design *design, RTLIL::Module *module, bool cons for (auto &bit : sig) outbit_to_cell[bit].insert(cell); } - cells.node(cell); } - for (auto &it_right : cell_to_inbit) - for (auto &it_sigbit : it_right.second) - for (auto &it_left : outbit_to_cell[it_sigbit]) - cells.edge(it_left, it_right.first); + // Build the graph for the topological sort. + for (auto &it_right : cell_to_inbit) { + const int r_index = cells.node(it_right.first); + for (auto &it_sigbit : it_right.second) { + for (auto &it_left : outbit_to_cell[it_sigbit]) { + const int l_index = cells.node(it_left); + cells.edge(l_index, r_index); + } + } + } cells.sort(); diff --git a/passes/opt/share.cc b/passes/opt/share.cc index abef719370b..586bd9dfe9d 100644 --- a/passes/opt/share.cc +++ b/passes/opt/share.cc @@ -1032,7 +1032,7 @@ struct ShareWorker } bool found_scc = !toposort.sort(); - topo_cell_drivers = std::move(toposort.database); + topo_cell_drivers = toposort.get_database(); if (found_scc && toposort.analyze_loops) for (auto &loop : toposort.loops) { diff --git a/passes/techmap/flatten.cc b/passes/techmap/flatten.cc index 7e6df5d2c1f..f49589b826c 100644 --- a/passes/techmap/flatten.cc +++ b/passes/techmap/flatten.cc @@ -312,7 +312,7 @@ struct FlattenPass : public Pass { for (auto cell : module->selected_cells()) { RTLIL::Module *tpl = design->module(cell->type); if (tpl != nullptr) { - if (topo_modules.database.count(tpl) == 0) + if (!topo_modules.has_edges(tpl)) worklist.insert(tpl); topo_modules.edge(tpl, module); } From b9745f638b0a2ee4fd97096af7bce0198aedd8be Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 20 Sep 2023 16:20:08 -0700 Subject: [PATCH 2/7] Remove extraneous "public:". --- kernel/utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/utils.h b/kernel/utils.h index e452f380e63..4679a23f2b9 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -140,7 +140,6 @@ template , typename OPS = hash_ops> cla std::vector sorted; std::set> loops; - public: TopoSort() : indirect_cmp(nodes) { analyze_loops = true; @@ -213,6 +212,7 @@ template , typename OPS = hash_ops> cla bool found_loops; std::vector nodes; const IndirectCmp indirect_cmp; + void sort_worker(const int root_index, std::vector &marked_cells, std::vector &active_cells, std::vector &active_stack) { if (active_cells[root_index]) { From e38c9e01c901e0f5d0eb9fe18981a2c5c95ddf61 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 5 Oct 2023 15:24:26 -0700 Subject: [PATCH 3/7] Undo formatting changes in kernel/utils.h. --- kernel/utils.h | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/kernel/utils.h b/kernel/utils.h index 4679a23f2b9..9b64ed8e114 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -31,30 +31,34 @@ YOSYS_NAMESPACE_BEGIN // A map-like container, but you can save and restore the state // ------------------------------------------------ -template > struct stackmap { - private: - std::vector> backup_state; +template> +struct stackmap +{ +private: + std::vector> backup_state; dict current_state; static T empty_tuple; - public: - stackmap() {} - stackmap(const dict &other) : current_state(other) {} +public: + stackmap() { } + stackmap(const dict &other) : current_state(other) { } - template stackmap &operator=(const Other &other) + template + void operator=(const Other &other) { - for (const auto &it : current_state) + for (auto &it : current_state) if (!backup_state.empty() && backup_state.back().count(it.first) == 0) backup_state.back()[it.first] = new T(it.second); current_state.clear(); - for (const auto &it : other) + for (auto &it : other) set(it.first, it.second); - - return *this; } - bool has(const Key &k) { return current_state.count(k) != 0; } + bool has(const Key &k) + { + return current_state.count(k) != 0; + } void set(const Key &k, const T &v) { @@ -79,7 +83,7 @@ template > struct stackma void reset(const Key &k) { - for (int i = GetSize(backup_state) - 1; i >= 0; i--) + for (int i = GetSize(backup_state)-1; i >= 0; i--) if (backup_state[i].count(k) != 0) { if (backup_state[i].at(k) == nullptr) current_state.erase(k); @@ -90,14 +94,20 @@ template > struct stackma current_state.erase(k); } - const dict &stdmap() { return current_state; } + const dict &stdmap() + { + return current_state; + } - void save() { backup_state.resize(backup_state.size() + 1); } + void save() + { + backup_state.resize(backup_state.size()+1); + } void restore() { log_assert(!backup_state.empty()); - for (const auto &it : backup_state.back()) + for (auto &it : backup_state.back()) if (it.second != nullptr) { current_state[it.first] = *it.second; delete it.second; From fd7bd420b3d9f1b031adced3add1f15f29048906 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 5 Oct 2023 15:26:29 -0700 Subject: [PATCH 4/7] Add back newline. --- kernel/utils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/utils.h b/kernel/utils.h index 9b64ed8e114..255f875c3f8 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -123,6 +123,7 @@ struct stackmap } }; + // ------------------------------------------------ // A simple class for topological sorting // ------------------------------------------------ From 0a37c2a301f3518df0d3a9ced7b2b9477b4fe9a0 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 5 Oct 2023 17:01:42 -0700 Subject: [PATCH 5/7] Fix translation bug: The old code really checks for the presense of a node, not an edge in glift and flatten. Add back statement that inserts nodes in order in opt_expr.cc. --- kernel/utils.h | 8 +------- passes/cmds/glift.cc | 2 +- passes/opt/opt_expr.cc | 1 + passes/techmap/flatten.cc | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/kernel/utils.h b/kernel/utils.h index 255f875c3f8..5a1279ef01c 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -174,11 +174,7 @@ template , typename OPS = hash_ops> cla void edge(T left, T right) { edge(node(left), node(right)); } - bool has_edges(const T &node) - { - auto it = node_to_index.find(node); - return it == node_to_index.end() || !edges[it->second].empty(); - } + bool has_node(const T &node) { return node_to_index.find(node) != node_to_index.end(); } bool sort() { @@ -192,8 +188,6 @@ template , typename OPS = hash_ops> cla std::vector marked_cells(edges.size(), false); std::vector active_cells(edges.size(), false); std::vector active_stack; - - marked_cells.reserve(edges.size()); sorted.reserve(edges.size()); for (const auto &it : node_to_index) diff --git a/passes/cmds/glift.cc b/passes/cmds/glift.cc index faa4289e3a9..8553b02a541 100644 --- a/passes/cmds/glift.cc +++ b/passes/cmds/glift.cc @@ -582,7 +582,7 @@ struct GliftPass : public Pass { for (auto cell : module->selected_cells()) { RTLIL::Module *tpl = design->module(cell->type); if (tpl != nullptr) { - if (!topo_modules.has_edges(tpl)) + if (!topo_modules.has_node(tpl)) worklist.push_back(tpl); topo_modules.edge(tpl, module); non_top_modules.insert(cell->type); diff --git a/passes/opt/opt_expr.cc b/passes/opt/opt_expr.cc index 7331d72a6fd..3eadd35c6af 100644 --- a/passes/opt/opt_expr.cc +++ b/passes/opt/opt_expr.cc @@ -424,6 +424,7 @@ void replace_const_cells(RTLIL::Design *design, RTLIL::Module *module, bool cons for (auto &bit : sig) outbit_to_cell[bit].insert(cell); } + cells.node(cell); } // Build the graph for the topological sort. diff --git a/passes/techmap/flatten.cc b/passes/techmap/flatten.cc index f49589b826c..4ddc4aff1fa 100644 --- a/passes/techmap/flatten.cc +++ b/passes/techmap/flatten.cc @@ -312,7 +312,7 @@ struct FlattenPass : public Pass { for (auto cell : module->selected_cells()) { RTLIL::Module *tpl = design->module(cell->type); if (tpl != nullptr) { - if (!topo_modules.has_edges(tpl)) + if (!topo_modules.has_node(tpl)) worklist.insert(tpl); topo_modules.edge(tpl, module); } From 6a5799cc2ed65a7481f9d1e9142b9039f78db188 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 5 Oct 2023 17:27:26 -0700 Subject: [PATCH 6/7] Add missing initialization of node_cmp_ member. --- kernel/utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/utils.h b/kernel/utils.h index 5a1279ef01c..31a8681f199 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -134,7 +134,7 @@ template , typename OPS = hash_ops> cla // We use this ordering of the edges in the adjacency matrix for // exact compatibility with an older implementation. struct IndirectCmp { - IndirectCmp(const std::vector &nodes) : nodes_(nodes) {} + IndirectCmp(const std::vector &nodes) : node_cmp_(), nodes_(nodes) {} bool operator()(int a, int b) const { log_assert(static_cast(a) < nodes_.size()); From bc0df04e065d53f3f1e2053861a94de29eb9e013 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 6 Oct 2023 12:53:05 -0700 Subject: [PATCH 7/7] Get rid of double lookup in TopoSort::node(). This speeds up typical TopoSort time overall by ~10%. --- kernel/utils.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/utils.h b/kernel/utils.h index 31a8681f199..8fa223824da 100644 --- a/kernel/utils.h +++ b/kernel/utils.h @@ -159,15 +159,12 @@ template , typename OPS = hash_ops> cla int node(T n) { - auto it = node_to_index.find(n); - if (it == node_to_index.end()) { - int index = static_cast(nodes.size()); - node_to_index[n] = index; - nodes.push_back(n); - edges.push_back(std::set(indirect_cmp)); - return index; + auto rv = node_to_index.emplace(n, static_cast(nodes.size())); + if (rv.second) { + nodes.push_back(n); + edges.push_back(std::set(indirect_cmp)); } - return it->second; + return rv.first->second; } void edge(int l_index, int r_index) { edges[r_index].insert(l_index); }