From 2bb707cf7e80383609649c38bbe1001ff2662ab6 Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Mon, 30 Sep 2024 19:26:12 -0400 Subject: [PATCH 1/3] [AP] Analytical Solver This introduces the Analytical Solver class to the AP flow. This is an integral part of the Global Placement stage and what gives Analytical Placement its name. This PR introduces the QP_HYBRID analytical solver which uses a hybrid Clique and Star net model to optimize the quadratic HPWL objective. The code is designed to allow for other analytical solvers to be implemented and interchanged without issue. A B2B solver will be coming in a future PR. --- .../analytical_placement_flow.cpp | 55 +++- .../analytical_place/analytical_solver.cpp | 250 ++++++++++++++++++ vpr/src/analytical_place/analytical_solver.h | 205 ++++++++++++++ .../analytical_place/partial_placement.cpp | 21 ++ vpr/src/analytical_place/partial_placement.h | 12 + .../ch_intrinsics/config/golden_results.txt | 2 +- .../diffeq1/config/golden_results.txt | 2 +- .../single_ff/config/golden_results.txt | 2 +- .../vtr_reg_basic/basic_ap/task_list.txt | 2 +- .../vtr_reg_basic/task_list.txt | 2 +- 10 files changed, 537 insertions(+), 16 deletions(-) create mode 100644 vpr/src/analytical_place/analytical_solver.cpp create mode 100644 vpr/src/analytical_place/analytical_solver.h diff --git a/vpr/src/analytical_place/analytical_placement_flow.cpp b/vpr/src/analytical_place/analytical_placement_flow.cpp index 139c05e61b5..dd25b531dbd 100644 --- a/vpr/src/analytical_place/analytical_placement_flow.cpp +++ b/vpr/src/analytical_place/analytical_placement_flow.cpp @@ -6,6 +6,8 @@ */ #include "analytical_placement_flow.h" +#include +#include "analytical_solver.h" #include "ap_netlist.h" #include "atom_netlist.h" #include "full_legalizer.h" @@ -19,6 +21,40 @@ #include "vtr_assert.h" #include "vtr_time.h" +/** + * @brief A helper method to log statistics on the APNetlist. + */ +static void print_ap_netlist_stats(const APNetlist& netlist) { + // Get the number of moveable and fixed blocks + size_t num_moveable_blocks = 0; + size_t num_fixed_blocks = 0; + for (APBlockId blk_id : netlist.blocks()) { + if (netlist.block_mobility(blk_id) == APBlockMobility::MOVEABLE) + num_moveable_blocks++; + else + num_fixed_blocks++; + } + // Get the fanout information of nets + size_t highest_fanout = 0; + float average_fanout = 0.f; + for (APNetId net_id : netlist.nets()) { + size_t net_fanout = netlist.net_pins(net_id).size(); + if (net_fanout > highest_fanout) + highest_fanout = net_fanout; + average_fanout += static_cast(net_fanout); + } + average_fanout /= static_cast(netlist.nets().size()); + // Print the statistics + VTR_LOG("Analytical Placement Netlist Statistics:\n"); + VTR_LOG("\tBlocks: %zu\n", netlist.blocks().size()); + VTR_LOG("\t\tMoveable Blocks: %zu\n", num_moveable_blocks); + VTR_LOG("\t\tFixed Blocks: %zu\n", num_fixed_blocks); + VTR_LOG("\tNets: %zu\n", netlist.nets().size()); + VTR_LOG("\t\tAverage Fanout: %.2f\n", average_fanout); + VTR_LOG("\t\tHighest Fanout: %zu\n", highest_fanout); + VTR_LOG("\tPins: %zu\n", netlist.pins().size()); +} + void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { (void)vpr_setup; // Start an overall timer for the Analytical Placement flow. @@ -38,22 +74,19 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) { APNetlist ap_netlist = gen_ap_netlist_from_atoms(atom_nlist, prepacker, constraints); + print_ap_netlist_stats(ap_netlist); // Run the Global Placer - // For now, just put all the moveable blocks at the center of the device - // grid. This will be replaced later. This is just for testing. + // For now, just runs the solver. PartialPlacement p_placement(ap_netlist); + std::unique_ptr solver = make_analytical_solver(e_analytical_solver::QP_HYBRID, + ap_netlist); + solver->solve(0, p_placement); + + // Verify that the partial placement is valid before running the full + // legalizer. const size_t device_width = device_ctx.grid.width(); const size_t device_height = device_ctx.grid.height(); - double device_center_x = static_cast(device_width) / 2.0; - double device_center_y = static_cast(device_height) / 2.0; - for (APBlockId ap_blk_id : ap_netlist.blocks()) { - if (ap_netlist.block_mobility(ap_blk_id) != APBlockMobility::MOVEABLE) - continue; - // If the APBlock is moveable, put it on the center for the device. - p_placement.block_x_locs[ap_blk_id] = device_center_x; - p_placement.block_y_locs[ap_blk_id] = device_center_y; - } VTR_ASSERT(p_placement.verify(ap_netlist, device_width, device_height, diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp new file mode 100644 index 00000000000..730eea7ccda --- /dev/null +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -0,0 +1,250 @@ +/** + * @file + * @author Alex Singer and Robert Luo + * @date October 2024 + * @brief The definitions of the analytical solvers used in the AP flow and + * their base class. + */ + +#include "analytical_solver.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "partial_placement.h" +#include "ap_netlist.h" +#include "vpr_error.h" +#include "vtr_assert.h" +#include "vtr_vector.h" + +std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, + const APNetlist& netlist) { + // Based on the solver type passed in, build the solver. + switch (solver_type) { + case e_analytical_solver::QP_HYBRID: + return std::make_unique(netlist); + default: + VPR_FATAL_ERROR(VPR_ERROR_AP, + "Unrecognized analytical solver type"); + break; + } + return nullptr; +} + +AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist) + : netlist_(netlist), + blk_id_to_row_id_(netlist.blocks().size(), APRowId::INVALID()), + row_id_to_blk_id_(netlist.blocks().size(), APBlockId::INVALID()) { + // Get the number of moveable blocks in the netlist and create a unique + // row ID from [0, num_moveable_blocks) for each moveable block in the + // netlist. + num_moveable_blocks_ = 0; + size_t current_row_id = 0; + for (APBlockId blk_id : netlist.blocks()) { + if (netlist.block_mobility(blk_id) != APBlockMobility::MOVEABLE) + continue; + APRowId new_row_id = APRowId(current_row_id); + blk_id_to_row_id_[blk_id] = new_row_id; + row_id_to_blk_id_[new_row_id] = blk_id; + current_row_id++; + num_moveable_blocks_++; + } +} + +void QPHybridSolver::init_linear_system() { + // Count the number of star nodes that the netlist will have. + size_t num_star_nodes = 0; + for (APNetId net_id : netlist_.nets()) { + if (netlist_.net_pins(net_id).size() > star_num_pins_threshold) + num_star_nodes++; + } + + // Initialize the linear system with zeros. + size_t num_variables = num_moveable_blocks_ + num_star_nodes; + A_sparse = Eigen::SparseMatrix(num_variables, num_variables); + b_x = Eigen::VectorXd::Zero(num_variables); + b_y = Eigen::VectorXd::Zero(num_variables); + + // Create a list of triplets that will be used to create the sparse + // coefficient matrix. This is the method recommended by Eigen to initialize + // this matrix. + std::vector> tripletList; + // Reserve enough space for the triplets. This is just to help with + // performance. + size_t num_nets = netlist_.nets().size(); + tripletList.reserve(num_moveable_blocks_ * num_nets); + + // Lambda expression to add a connection to the linear system from the src + // to the target with the given weight. The src_row_id may represent a star + // node (so it does not represent an APBlock) or a moveable APBlock. The + // target_blk_id may be a fixed or moveable block. + auto add_connection_to_system = [&](size_t src_row_id, + APBlockId target_blk_id, + double weight) { + // Verify that this is a valid row. + VTR_ASSERT_DEBUG(src_row_id < A_sparse.rows()); + // Verify that this is a valid block id. + VTR_ASSERT_DEBUG(target_blk_id.is_valid()); + // The src_row_id is always a moveable block (rows in the matrix always + // coorespond to a moveable APBlock or a star node. + if (netlist_.block_mobility(target_blk_id) == APBlockMobility::MOVEABLE) { + // If the target is also moveable, update the coefficient matrix. + size_t target_row_id = (size_t)blk_id_to_row_id_[target_blk_id]; + VTR_ASSERT_DEBUG(target_row_id < A_sparse.rows()); + tripletList.emplace_back(src_row_id, src_row_id, weight); + tripletList.emplace_back(target_row_id, target_row_id, weight); + tripletList.emplace_back(src_row_id, target_row_id, -weight); + tripletList.emplace_back(target_row_id, src_row_id, -weight); + } else { + // If the target is fixed, update the coefficient matrix and the + // constant vectors. + tripletList.emplace_back(src_row_id, src_row_id, weight); + VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).x >= 0); + VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).y >= 0); + // FIXME: These fixed block locations are aligned to the anchor of + // the tiles they are in. This is not correct. A method + // should be added to the netlist class or to a util file + // which can get a more accurate position. + double blk_loc_x = netlist_.block_loc(target_blk_id).x; + double blk_loc_y = netlist_.block_loc(target_blk_id).y; + b_x(src_row_id) += weight * blk_loc_x; + b_y(src_row_id) += weight * blk_loc_y; + } + }; + + // Create the connections using a hybrid connection model of the star and + // clique connnection models. + size_t star_node_offset = 0; + for (APNetId net_id : netlist_.nets()) { + size_t num_pins = netlist_.net_pins(net_id).size(); + VTR_ASSERT_DEBUG(num_pins > 1); + if (num_pins > star_num_pins_threshold) { + // Create a star node and connect each block in the net to the star + // node. + // Using the weight from FastPlace + double w = static_cast(num_pins) / static_cast(num_pins - 1); + size_t star_node_id = num_moveable_blocks_ + star_node_offset; + for (APPinId pin_id : netlist_.net_pins(net_id)) { + APBlockId blk_id = netlist_.pin_block(pin_id); + add_connection_to_system(star_node_id, blk_id, w); + } + star_node_offset++; + } else { + // Create a clique connection where every block in a net connects + // exactly once to every other block in the net. + // Using the weight from FastPlace + double w = 1.0 / static_cast(num_pins - 1); + for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) { + APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx); + APBlockId first_blk_id = netlist_.pin_block(first_pin_id); + for (size_t jpin_idx = ipin_idx + 1; jpin_idx < num_pins; jpin_idx++) { + APPinId second_pin_id = netlist_.net_pin(net_id, jpin_idx); + APBlockId second_blk_id = netlist_.pin_block(second_pin_id); + // Make sure that the first node is moveable. This makes + // creating the connection easier. + if (netlist_.block_mobility(first_blk_id) == APBlockMobility::FIXED) { + // If both blocks are fixed, no connection needs to be + // made; just continue. + if (netlist_.block_mobility(second_blk_id) == APBlockMobility::FIXED) { + continue; + } + // If the second block is moveable, swap the first and + // second block so the first block is the moveable one. + std::swap(first_blk_id, second_blk_id); + } + size_t first_row_id = (size_t)blk_id_to_row_id_[first_blk_id]; + add_connection_to_system(first_row_id, second_blk_id, w); + } + } + } + } + + // Make sure that the number of star nodes created matches the number of + // star nodes we pre-calculated we would have. + VTR_ASSERT_SAFE(num_star_nodes == star_node_offset); + + // Populate the A_sparse matrix using the triplets. + A_sparse.setFromTriplets(tripletList.begin(), tripletList.end()); +} + +/** + * @brief Helper method to update the linear system with anchors to the current + * partial placement. + * + * For each moveable block (with row = i) in the netlist: + * A[i][i] = A[i][i] + coeff_pseudo_anchor; + * b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor; + * Where coeff_pseudo_anchor grows with each iteration. + * + * This is basically a fast way of adding a connection between a moveable block + * and a fixed block. + */ +static inline void update_linear_system_with_anchors( + Eigen::SparseMatrix &A_sparse_diff, + Eigen::VectorXd &b_x_diff, + Eigen::VectorXd &b_y_diff, + PartialPlacement& p_placement, + size_t num_moveable_blocks, + vtr::vector row_id_to_blk_id, + unsigned iteration) { + // Anchor weights grow exponentially with iteration. + double coeff_pseudo_anchor = 0.01 * std::exp((double)iteration/5); + for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks; row_id_idx++) { + APRowId row_id = APRowId(row_id_idx); + APBlockId blk_id = row_id_to_blk_id[row_id]; + double pseudo_w = coeff_pseudo_anchor; + A_sparse_diff.coeffRef(row_id_idx, row_id_idx) += pseudo_w; + b_x_diff(row_id_idx) += pseudo_w * p_placement.block_x_locs[blk_id]; + b_y_diff(row_id_idx) += pseudo_w * p_placement.block_y_locs[blk_id]; + } +} + +void QPHybridSolver::solve(unsigned iteration, PartialPlacement &p_placement) { + // Create a temporary linear system which will contain the original linear + // system which may be updated to include the anchor points. + Eigen::SparseMatrix A_sparse_diff = Eigen::SparseMatrix(A_sparse); + Eigen::VectorXd b_x_diff = Eigen::VectorXd(b_x); + Eigen::VectorXd b_y_diff = Eigen::VectorXd(b_y); + // In the first iteration, the orginal linear system is used. + // In any other iteration, use the moveable APBlocks current placement as + // anchor-points (fixed block positions). + if (iteration != 0) { + update_linear_system_with_anchors(A_sparse_diff, b_x_diff, b_y_diff, + p_placement, num_moveable_blocks_, + row_id_to_blk_id_, iteration); + } + // Verify that the constant vectors are valid. + VTR_ASSERT_DEBUG(!b_x_diff.hasNaN() && "b_x has NaN!"); + VTR_ASSERT_DEBUG(!b_y_diff.hasNaN() && "b_y has NaN!"); + + // Set up the ConjugateGradient Solver using the coefficient matrix. + // TODO: can change cg.tolerance to increase performance when needed + // - This tolerance may need to be a function of the number of nets. + // - Instead of normalizing the fixed blocks, the tolerance can be scaled + // by the size of the device. + Eigen::ConjugateGradient, Eigen::Lower|Eigen::Upper> cg; + cg.compute(A_sparse_diff); + VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at compute!"); + // Use the solver to solve for x and y using the constant vectors + // TODO: Use solve with guess to make this faster. Use the previous placement + // as a guess. + Eigen::VectorXd x = cg.solve(b_x_diff); + VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_x!"); + Eigen::VectorXd y = cg.solve(b_y_diff); + VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!"); + + // Write the results back into the partial placement object. + for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { + APRowId row_id = APRowId(row_id_idx); + APBlockId blk_id = row_id_to_blk_id_[row_id]; + p_placement.block_x_locs[blk_id] = x[row_id_idx]; + p_placement.block_y_locs[blk_id] = y[row_id_idx]; + } +} + diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h new file mode 100644 index 00000000000..115fbbe3fb9 --- /dev/null +++ b/vpr/src/analytical_place/analytical_solver.h @@ -0,0 +1,205 @@ +/** + * @file + * @author Alex Singer and Robert Luo + * @date October 2024 + * @brief The declarations of the Analytical Solver base class which is used + * to define the functionality of all solvers used in the AP flow. + */ + +#pragma once + +#include +#include "Eigen/Sparse" +#include "ap_netlist_fwd.h" +#include "vtr_strong_id.h" +#include "vtr_vector.h" + +// Forward declarations +class PartialPlacement; +class APNetlist; + +/** + * @brief Enumeration of all of the solvers currently implemented in VPR. + * + * NOTE: More are coming. + */ +enum class e_analytical_solver { + QP_HYBRID // A solver which optimizes the quadratic HPWL of the design. +}; + +/** + * @brief A strong ID for the rows in a matrix used during solving. + * + * This gives a linearized ID for each of the moveable blocks from 0 to the + * number of moveable blocks. + */ +struct ap_row_id_tag {}; +typedef vtr::StrongId APRowId; + +/** + * @brief The Analytical Solver base class + * + * This provides functionality that all Analytical Solvers will use. + * + * It provides a standard interface that all Analytical Solvers must implement + * so they can be used interchangably. This makes it very easy to test and + * compare different solvers. + */ +class AnalyticalSolver { +public: + virtual ~AnalyticalSolver() {} + + /** + * @brief Constructor of the base AnalyticalSolver class + * + * Initializes the internal data members of the base class which are usefull + * for all solvers. + */ + AnalyticalSolver(const APNetlist &netlist); + + /** + * @brief Run an iteration of the solver using the given partial placement + * as a hint for what a "legal" solution would look like. + * + * Each solver is trying to optimize its own objective function. The solver + * is also, at the same time, trying to approach a solution that is "similar" + * to the hint provided. The larger the iteration, the more influenced the + * solver is by the hint provided. + * + * It is implied that the hint partial placement is a placement which has + * gone through some form of legalization. This allows the solver to + * optimize its objective function, while also trying to approach a more + * legal solution. + * + * @param iteration The current iteration number of the Global Placer. + * @param p_placement A "hint" to a legal solution that the solver should + * try and be like. + */ + virtual void solve(unsigned iteration, PartialPlacement &p_placement) = 0; + +protected: + + /// @brief The APNetlist the solver is optimizing over. It is implied that + /// the netlist is not being modified during global placement. + const APNetlist& netlist_; + + /// @brief The number of moveable blocks in the netlist. This is helpful + /// when allocating matrices. + size_t num_moveable_blocks_ = 0; + + /// @brief A lookup between a moveable APBlock and its linear ID from + /// [0, num_moveable_blocks). Fixed blocks will return an invalid row + /// ID. This is useful when knowing which row in the matrix + /// corresponds with the given APBlock. + vtr::vector blk_id_to_row_id_; + + /// @brief A reverse lookup between the linear moveable block ID and the + /// APBlock it represents. useful when getting the results from the + /// solver. + vtr::vector row_id_to_blk_id_; +}; + +/** + * @brief A factory method which creates an Analytical Solver of the given type. + */ +std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, + const APNetlist &netlist); + +/** + * @brief An Analytical Solver which tries to minimize the quadratic HPWL + * objective: + * SUM((xmax - xmin)^2 + (ymax - ymin)^2) over all nets. + * + * This is implemented using a hybrid approach, which uses both the Clique and + * Star net models. Since circuits are hypernets, a single net may connect more + * than just two blocks. The Clique model creates a clique connection between + * all blocks in the net (each block connects to each other block exactly once). + * The Star model creates a "fake". moveable star node which all blocks in the + * net connect to (but the blocks in the net do not connect to each other). + * + * The Star net model creates an extra variable for the solver to solve; however + * it allows the matrix to become more sparse (compared to the Clique net model). + * This solver uses a net pin threshold, where if the the number of pins in a + * net is larger than that threshold, that net will use the Star net model, and + * if the number of pins is smaller the Clique model is used. + * + * This technique was proposed in FastPlace, where they proved that, if the + * weights of the Star and Clique connections are set to certain values, they + * will both achieve the same answer (minimizing the quadratic HPWL objective). + * https://doi.org/10.1109/TCAD.2005.846365 + */ +class QPHybridSolver : public AnalyticalSolver { +private: + /// @brief The threshold for the number of pins a net will have to use the + /// Star or Clique net models. If the number of pins is larger + /// than this number, a star node will be created. + /// This number will not change the solution, but may improve performance if + /// tuned properly. If too low, then more star nodes will be created which + /// adds more variables to the linear system; if too high, then more clique + /// connections will be created, which will make the coefficient matrix less + /// sparse. + static constexpr size_t star_num_pins_threshold = 3; + + /** + * @brief Initializes the linear system of Ax = b_x and Ax = b_y based on + * the APNetlist and the fixed APBlock locations. + * + * This is the "ideal" quadratic linear system where no anchor-points are + * used. This system will be solved in the first iteration of the solver, + * then used to generate the next systems. + */ + void init_linear_system(); + + // The following variables represent the linear system without any anchor + // points. These are filled in the constructor and never modified. + // When the anchor-points are taken into consideration, the diagonal of the + // coefficient matrix and the elements of the constant vectors are updated + // and used in the solver. These are stored to prevent re-computing each + // iteration. + + /// @brief The coefficient matrix for the un-anchored linear system + /// This is expected to be sparse. This is shared between the x and y + /// dimensions. + Eigen::SparseMatrix A_sparse; + /// @brief The constant vector in the x dimension for the linear system. + Eigen::VectorXd b_x; + /// @brief The constant vector in the y dimension for the linear system. + Eigen::VectorXd b_y; + +public: + + /** + * @brief Constructor of the QPHybridSolver + * + * Initializes internal data and constructs the initial linear system. + */ + QPHybridSolver(const APNetlist& inetlist) : AnalyticalSolver(inetlist) { + // Initializing the linear system only depends on the netlist and fixed + // block locations. Both are provided by the netlist, allowing this to + // be initialized in the constructor. + init_linear_system(); + } + + /** + * @brief Perform an iteration of the QP solver, storing the result into + * the partial placement object passed in. + * + * In the first iteration (iteration = 0), the partial placement object will + * not be used and the result would be the solution to the quadratic hpwl + * objective. + * + * In the following iterations (iteration > 0), the partial placement object + * will be used as anchor-points to guide the solver to a "more legal" + * solution (assuming the partial placement contains a legal solution). + * Higher iterations use stronger attraction forces between the moveable + * blocks and their anchor-points. + * + * See the base class for more information. + * + * @param iteration The current iteration of the Global Placer. + * @param p_placement A "guess" solution. The result will be written into + * this object. + */ + void solve(unsigned iteration, PartialPlacement &p_placement) final; +}; + diff --git a/vpr/src/analytical_place/partial_placement.cpp b/vpr/src/analytical_place/partial_placement.cpp index aa755e36911..fc80f43b4a7 100644 --- a/vpr/src/analytical_place/partial_placement.cpp +++ b/vpr/src/analytical_place/partial_placement.cpp @@ -8,8 +8,29 @@ #include "partial_placement.h" #include #include +#include #include "ap_netlist.h" +double PartialPlacement::get_hpwl(const APNetlist& netlist) const { + double hpwl = 0.0; + for (APNetId net_id : netlist.nets()) { + double min_x = std::numeric_limits::max(); + double max_x = std::numeric_limits::lowest(); + double min_y = std::numeric_limits::max(); + double max_y = std::numeric_limits::lowest(); + for (APPinId pin_id : netlist.net_pins(net_id)) { + APBlockId blk_id = netlist.pin_block(pin_id); + min_x = std::min(min_x, block_x_locs[blk_id]); + max_x = std::max(max_x, block_x_locs[blk_id]); + min_y = std::min(min_y, block_y_locs[blk_id]); + max_y = std::max(max_y, block_y_locs[blk_id]); + } + VTR_ASSERT_SAFE(max_x >= min_x && max_y >= min_y); + hpwl += max_x - min_x + max_y - min_y; + } + return hpwl; +} + bool PartialPlacement::verify_locs(const APNetlist& netlist, size_t grid_width, size_t grid_height) const { diff --git a/vpr/src/analytical_place/partial_placement.h b/vpr/src/analytical_place/partial_placement.h index 132fd42d919..e111dd7bd79 100644 --- a/vpr/src/analytical_place/partial_placement.h +++ b/vpr/src/analytical_place/partial_placement.h @@ -134,6 +134,18 @@ struct PartialPlacement { return t_physical_tile_loc(tile_x_loc, tile_y_loc, tile_layer); } + /** + * @brief Computes the HPWL of the current partial placement solution. + * + * NOTE: This gets the HPWL of the netlist and partial placement as it + * currently appears. The user should be aware that fractional + * positions of blocks are not realistic and the netlist is ignoring + * some nets to make the analytical placment problem easier. + * The user should use an atom or cluster level HPWL for an accurate + * result. This is used for the Global Placer. + */ + double get_hpwl(const APNetlist& netlist) const; + /** * @brief Verify the block_x_locs and block_y_locs vectors * diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt index 2bb6c04d5aa..8d0f14aa789 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 9.00 vpr 72.32 MiB -1 -1 0.46 18500 3 0.09 -1 -1 33256 -1 -1 11 99 1 0 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 74060 99 130 344 474 1 250 241 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 40 3176 21 6.79088e+06 696192 706193. 2443.58 5.46 0.516448 0.473535 26254 175826 -1 3100 60 1045 1843 1532128 1054081 2.28022 2.28022 -155.657 -2.28022 0 0 926341. 3205.33 0.26 0.78 0.26 -1 -1 0.26 0.189493 0.174147 +fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.00 vpr 72.12 MiB -1 -1 0.45 18396 3 0.09 -1 -1 33188 -1 -1 34 99 1 0 success v8.0.0-11429-g78275509a-dirty release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-02T13:22:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73856 99 130 240 229 1 238 264 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 72.1 MiB 0.23 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 72.1 MiB 0.23 72.1 MiB 0.23 34 2886 15 6.79088e+06 1.00605e+06 618332. 2139.56 3.18 0.448255 0.41129 25102 150614 -1 2722 13 619 970 98287 23397 2.47058 2.47058 -148.551 -2.47058 0 0 787024. 2723.27 0.24 0.09 0.22 -1 -1 0.24 0.0502149 0.0464303 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt index d72558a15fc..445629f9b23 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml diffeq1.v common 30.54 vpr 73.96 MiB -1 -1 0.74 23360 15 0.35 -1 -1 34316 -1 -1 43 162 0 5 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 75732 162 96 1009 950 1 739 306 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 76 15726 25 6.79088e+06 2.5593e+06 1.25153e+06 4330.55 24.75 2.70597 2.57178 32878 320202 -1 14591 16 3409 7635 1299474 276683 21.3784 21.3784 -1758.79 -21.3784 0 0 1.55119e+06 5367.45 0.45 0.60 0.54 -1 -1 0.45 0.254682 0.242955 +fixed_k6_frac_N8_22nm.xml diffeq1.v common 20.71 vpr 73.80 MiB -1 -1 0.73 23416 15 0.36 -1 -1 34412 -1 -1 55 162 0 5 success v8.0.0-11429-g78275509a-dirty release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-02T13:22:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 75572 162 96 817 258 1 775 318 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 73.8 MiB 0.66 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 73.8 MiB 0.66 73.8 MiB 0.65 78 16251 19 6.79088e+06 2.72096e+06 1.27783e+06 4421.56 14.97 2.38249 2.26406 33454 332105 -1 15052 16 3634 8698 1424645 305832 21.8615 21.8615 -1844.65 -21.8615 0 0 1.60349e+06 5548.42 0.47 0.67 0.58 -1 -1 0.47 0.264426 0.252264 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt index 33b0707cba4..a3367f68e35 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -fixed_k6_frac_N8_22nm.xml single_ff.v common 3.59 vpr 71.37 MiB -1 -1 0.13 17020 1 0.02 -1 -1 29764 -1 -1 1 2 0 0 success v8.0.0-11425-g2f84f81f9 release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-09-27T10:26:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73080 2 1 3 4 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 20 32 1 6.79088e+06 13472 414966. 1435.87 1.99 0.00160215 0.00145751 22510 95286 -1 26 1 2 2 148 27 0.691615 0.691615 -1.31306 -0.691615 0 0 503264. 1741.40 0.16 0.00 0.13 -1 -1 0.16 0.00116321 0.00112664 +fixed_k6_frac_N8_22nm.xml single_ff.v common 2.76 vpr 70.63 MiB -1 -1 0.13 16160 1 0.23 -1 -1 29588 -1 -1 1 2 0 0 success v8.0.0-11429-g78275509a-dirty release VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-02T13:22:58 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72324 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.6 MiB 0.09 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.6 MiB 0.09 70.6 MiB 0.09 20 32 1 6.79088e+06 13472 414966. 1435.87 0.60 0.00144364 0.00136058 22510 95286 -1 40 1 2 2 393 99 1.06752 1.06752 -2.06486 -1.06752 0 0 503264. 1741.40 0.18 0.00 0.13 -1 -1 0.18 0.00126359 0.00121387 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt index f51df8eb63c..1d020ceef11 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt @@ -1,5 +1,5 @@ # This extra task list is for running just the basic_ap tasks in isolation. regression_tests/vtr_reg_basic/basic_ap/single_wire regression_tests/vtr_reg_basic/basic_ap/single_ff -#regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics +regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics regression_tests/vtr_reg_basic/basic_ap/diffeq1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt index 99aa38f49c4..64d01d32ee0 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt @@ -1,6 +1,6 @@ regression_tests/vtr_reg_basic/basic_ap/single_wire regression_tests/vtr_reg_basic/basic_ap/single_ff -#regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics +regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics regression_tests/vtr_reg_basic/basic_ap/diffeq1 regression_tests/vtr_reg_basic/basic_no_timing regression_tests/vtr_reg_basic/basic_timing From fbfe6383d4d135c017212beb1a6db5be459c568f Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Fri, 4 Oct 2024 10:25:03 -0400 Subject: [PATCH 2/3] [AP] Updated CI to Support Eigen --- .github/workflows/test.yml | 42 +++++++++++++------ vpr/CMakeLists.txt | 18 ++++++-- .../analytical_place/analytical_solver.cpp | 24 ++++++++--- vpr/src/analytical_place/analytical_solver.h | 11 ++++- .../vtr_reg_basic/basic_ap/task_list.txt | 5 --- .../vtr_reg_basic/task_list.txt | 4 -- .../ch_intrinsics/ch_intrinsics_fixed_io.xml | 0 .../basic_ap/ch_intrinsics/config/config.txt | 0 .../ch_intrinsics/config/golden_results.txt | 0 .../basic_ap/diffeq1/config/config.txt | 0 .../diffeq1/config/golden_results.txt | 0 .../basic_ap/diffeq1/diffeq1_fixed_io.xml | 0 .../basic_ap/single_ff/config/config.txt | 0 .../single_ff/config/golden_results.txt | 0 .../basic_ap/single_ff/single_ff_fixed_io.xml | 0 .../basic_ap/single_wire/config/config.txt | 0 .../single_wire/config/golden_results.txt | 0 .../single_wire/single_wire_fixed_io.xml | 0 .../vtr_reg_strong/basic_ap/task_list.txt | 5 +++ .../vtr_reg_strong/task_list.txt | 4 ++ 20 files changed, 83 insertions(+), 30 deletions(-) delete mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/ch_intrinsics/config/config.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/ch_intrinsics/config/golden_results.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/diffeq1/config/config.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/diffeq1/config/golden_results.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/diffeq1/diffeq1_fixed_io.xml (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_ff/config/config.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_ff/config/golden_results.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_ff/single_ff_fixed_io.xml (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_wire/config/config.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_wire/config/golden_results.txt (100%) rename vtr_flow/tasks/regression_tests/{vtr_reg_basic => vtr_reg_strong}/basic_ap/single_wire/single_wire_fixed_io.xml (100%) create mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/task_list.txt diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8035b7e0834..aa6d003fe6b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -139,57 +139,68 @@ jobs: { name: 'Basic', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic with highest assertion level', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=4 -DWITH_BLIFEXPLORER=on', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic_odin', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DWITH_PARMYS=OFF -DWITH_ODIN=on', - suite: 'vtr_reg_basic_odin' + suite: 'vtr_reg_basic_odin', + extra_pkgs: "" }, { name: 'Basic with NO_GRAPHICS', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVPR_USE_EZGL=off', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic with NO_SERVER', params: '-DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVPR_USE_EZGL=on -DVPR_USE_SERVER=off', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic with CAPNPROTO disabled', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVTR_ENABLE_CAPNPROTO=off', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic with VTR_ENABLE_DEBUG_LOGGING', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVTR_ENABLE_DEBUG_LOGGING=on', - suite: 'vtr_reg_basic' + suite: 'vtr_reg_basic', + extra_pkgs: "" }, { name: 'Basic_odin with VTR_ENABLE_DEBUG_LOGGING', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVTR_ENABLE_DEBUG_LOGGING=on -DWITH_PARMYS=OFF -DWITH_ODIN=on', - suite: 'vtr_reg_basic_odin' + suite: 'vtr_reg_basic_odin', + extra_pkgs: "" }, { name: 'Strong', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on', - suite: 'vtr_reg_strong' + suite: 'vtr_reg_strong', + extra_pkgs: "libeigen3-dev" }, { name: 'Strong_odin', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DWITH_PARMYS=OFF -DWITH_ODIN=on', - suite: 'vtr_reg_strong_odin' + suite: 'vtr_reg_strong_odin', + extra_pkgs: "" }, { name: 'Valgrind Memory', params: '-DCMAKE_COMPILE_WARNING_AS_ERROR=on -DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DWITH_ODIN=on', - suite: 'vtr_reg_valgrind_small' + suite: 'vtr_reg_valgrind_small', + extra_pkgs: "" } ] name: 'R: ${{ matrix.name }}' @@ -198,10 +209,17 @@ jobs: - uses: actions/setup-python@v5 with: python-version: 3.10.10 + - uses: actions/checkout@v4 with: submodules: 'true' - - run: ./.github/scripts/install_dependencies.sh + + - name: Install dependencies + run: ./.github/scripts/install_dependencies.sh + + - name: Install external libraries + run: sudo apt install -y ${{ matrix.extra_pkgs }} + if: ${{ matrix.extra_pkgs }} - uses: hendrikmuhs/ccache-action@v1.2 diff --git a/vpr/CMakeLists.txt b/vpr/CMakeLists.txt index 0cbaec216a6..4e3ccc0b12c 100644 --- a/vpr/CMakeLists.txt +++ b/vpr/CMakeLists.txt @@ -76,15 +76,27 @@ add_library(libvpr STATIC target_include_directories(libvpr PUBLIC ${LIB_INCLUDE_DIRS}) + +# Find if Eigen is installed. Eigen is used within the Analytical Solver of the +# Analytical Placement flow. If Eigen is not installed, certain solvers cannot +# be used. +find_package(Eigen3 3.3 NO_MODULE) +if (TARGET Eigen3::Eigen) + target_link_libraries (libvpr Eigen3::Eigen) + target_compile_definitions(libvpr PUBLIC -DEIGEN_INSTALLED) + message(STATUS "Eigen3: Found") +else () + message(STATUS "Eigen3: Not Found. Some features may be disabled.") +endif (TARGET Eigen3::Eigen) + #VPR_ANALYTIC_PLACE is initialized in the root CMakeLists -#Check Eigen dependency +# NOTE: This is the cluster-level Analytical Placement which existed before the +# flat Analytical Placement flow. if(${VPR_ANALYTIC_PLACE}) message(STATUS "VPR Analytic Placement: Requested") - find_package(Eigen3 3.3 NO_MODULE) if (TARGET Eigen3::Eigen) message(STATUS "VPR Analytic Placement dependency (Eigen3): Found") message(STATUS "VPR Analytic Placement: Enabled") - target_link_libraries (libvpr Eigen3::Eigen) target_compile_definitions(libvpr PUBLIC -DENABLE_ANALYTIC_PLACE) else () message(STATUS "VPR Analytic Placement dependency (Eigen3): Not Found (Download manually with sudo apt install libeigen3-dev, and rebuild)") diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index 730eea7ccda..c1bd982965c 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -7,11 +7,6 @@ */ #include "analytical_solver.h" -#include -#include -#include -#include -#include #include #include #include @@ -23,12 +18,27 @@ #include "vtr_assert.h" #include "vtr_vector.h" +#ifdef EIGEN_INSTALLED +#include +#include +#include +#include +#include +#endif // EIGEN_INSTALLED + std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, const APNetlist& netlist) { // Based on the solver type passed in, build the solver. switch (solver_type) { case e_analytical_solver::QP_HYBRID: +#ifdef EIGEN_INSTALLED return std::make_unique(netlist); +#else + (void)netlist; + VPR_FATAL_ERROR(VPR_ERROR_AP, + "QP Hybrid Solver requires the Eigen library"); + break; +#endif // EIGEN_INSTALLED default: VPR_FATAL_ERROR(VPR_ERROR_AP, "Unrecognized analytical solver type"); @@ -57,6 +67,8 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist) } } +#ifdef EIGEN_INSTALLED + void QPHybridSolver::init_linear_system() { // Count the number of star nodes that the netlist will have. size_t num_star_nodes = 0; @@ -248,3 +260,5 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement &p_placement) { } } +#endif // EIGEN_INSTALLED + diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index 115fbbe3fb9..7cb37bec6c8 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -9,11 +9,14 @@ #pragma once #include -#include "Eigen/Sparse" #include "ap_netlist_fwd.h" #include "vtr_strong_id.h" #include "vtr_vector.h" +#ifdef EIGEN_INSTALLED +#include "Eigen/Sparse" +#endif // EIGEN_INSTALLED + // Forward declarations class PartialPlacement; class APNetlist; @@ -105,6 +108,10 @@ class AnalyticalSolver { std::unique_ptr make_analytical_solver(e_analytical_solver solver_type, const APNetlist &netlist); +// The Eigen library is used to solve matrix equations in the following solvers. +// The solver cannot be built if Eigen is not installed. +#ifdef EIGEN_INSTALLED + /** * @brief An Analytical Solver which tries to minimize the quadratic HPWL * objective: @@ -203,3 +210,5 @@ class QPHybridSolver : public AnalyticalSolver { void solve(unsigned iteration, PartialPlacement &p_placement) final; }; +#endif // EIGEN_INSTALLED + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt deleted file mode 100644 index 1d020ceef11..00000000000 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/task_list.txt +++ /dev/null @@ -1,5 +0,0 @@ -# This extra task list is for running just the basic_ap tasks in isolation. -regression_tests/vtr_reg_basic/basic_ap/single_wire -regression_tests/vtr_reg_basic/basic_ap/single_ff -regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics -regression_tests/vtr_reg_basic/basic_ap/diffeq1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt index 64d01d32ee0..386b06be76d 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_basic/task_list.txt @@ -1,7 +1,3 @@ -regression_tests/vtr_reg_basic/basic_ap/single_wire -regression_tests/vtr_reg_basic/basic_ap/single_ff -regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics -regression_tests/vtr_reg_basic/basic_ap/diffeq1 regression_tests/vtr_reg_basic/basic_no_timing regression_tests/vtr_reg_basic/basic_timing regression_tests/vtr_reg_basic/basic_timing_no_sdc diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/ch_intrinsics_fixed_io.xml diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/config.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/config.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/config.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/ch_intrinsics/config/golden_results.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics/config/golden_results.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/config.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/config.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/config.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/config/golden_results.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/config/golden_results.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/diffeq1_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/diffeq1_fixed_io.xml similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/diffeq1/diffeq1_fixed_io.xml rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/diffeq1/diffeq1_fixed_io.xml diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/config/config.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/config.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/config/config.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/config/golden_results.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/config/golden_results.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/config/golden_results.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/single_ff_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/single_ff_fixed_io.xml similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_ff/single_ff_fixed_io.xml rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_ff/single_ff_fixed_io.xml diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/config/config.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/config.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/config/config.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/config/golden_results.txt similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/config/golden_results.txt rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/config/golden_results.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/single_wire_fixed_io.xml b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/single_wire_fixed_io.xml similarity index 100% rename from vtr_flow/tasks/regression_tests/vtr_reg_basic/basic_ap/single_wire/single_wire_fixed_io.xml rename to vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/single_wire/single_wire_fixed_io.xml diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/task_list.txt new file mode 100644 index 00000000000..d6c7b6615b4 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/basic_ap/task_list.txt @@ -0,0 +1,5 @@ +# This extra task list is for running just the basic_ap tasks in isolation. +regression_tests/vtr_reg_strong/basic_ap/single_wire +regression_tests/vtr_reg_strong/basic_ap/single_ff +regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics +regression_tests/vtr_reg_strong/basic_ap/diffeq1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt index 73f97a8867b..ed092987917 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt @@ -1,3 +1,7 @@ +regression_tests/vtr_reg_strong/basic_ap/single_wire +regression_tests/vtr_reg_strong/basic_ap/single_ff +regression_tests/vtr_reg_strong/basic_ap/ch_intrinsics +regression_tests/vtr_reg_strong/basic_ap/diffeq1 regression_tests/vtr_reg_strong/strong_absorb_buffers regression_tests/vtr_reg_strong/strong_analysis_only regression_tests/vtr_reg_strong/strong_analytic_placer From eabb6e33c247ee888a1e2bae5c88f1e573417b1e Mon Sep 17 00:00:00 2001 From: AlexandreSinger Date: Sat, 5 Oct 2024 14:36:13 -0400 Subject: [PATCH 3/3] [AP] Updated Analytical Solver Documentation --- .../analytical_place/analytical_solver.cpp | 144 +++++++++++++----- vpr/src/analytical_place/analytical_solver.h | 6 +- 2 files changed, 105 insertions(+), 45 deletions(-) diff --git a/vpr/src/analytical_place/analytical_solver.cpp b/vpr/src/analytical_place/analytical_solver.cpp index c1bd982965c..e15f510a20b 100644 --- a/vpr/src/analytical_place/analytical_solver.cpp +++ b/vpr/src/analytical_place/analytical_solver.cpp @@ -69,6 +69,74 @@ AnalyticalSolver::AnalyticalSolver(const APNetlist& netlist) #ifdef EIGEN_INSTALLED +/** + * @brief Helper method to add a connection between a src moveable node and a + * target APBlock with the given weight. This updates the tripleList and + * the constant vectors with the values necessary to solve the quadratic + * objective function. + * + * The A_sparse matrix is square and symmetric, so the use of the "row_id" as + * input is arbitrary; it could easily have been "src_col_id". + * + * The src_row_id always represents a moveable node in the linear system. It can + * represent a moveable APBlock or a star node. + * + * The target_blk_id may be either a moveable or fixed block. + * + * If the target block (t) is moveable, with source row s: + * A[s][s] = A[s][s] + weight + * A[t][t] = A[t][t] + weight + * A[s][t] = A[s][t] - weight + * A[t][s] = A[t][s] - weight + * If the target block is fixed: + * A[s][s] = A[s][s] + weight + * b[s] = b[s] + pos[block(t)] * weight + * + * These update equations come from taking the partial derivatives of the + * quadratic objective function w.r.t the moveable block locations. This is + * explained in detail in the FastPlace paper. + */ +static inline void add_connection_to_system(size_t src_row_id, + APBlockId target_blk_id, + double weight, + std::vector>& tripletList, + Eigen::VectorXd& b_x, + Eigen::VectorXd& b_y, + Eigen::SparseMatrix& A_sparse, + vtr::vector& blk_id_to_row_id, + const APNetlist& netlist) { + // Verify that this is a valid row. + VTR_ASSERT_DEBUG(src_row_id < (size_t)A_sparse.rows()); + VTR_ASSERT_DEBUG(A_sparse.rows() == A_sparse.cols()); + // Verify that this is a valid block id. + VTR_ASSERT_DEBUG(target_blk_id.is_valid()); + // The src_row_id is always a moveable block (rows in the matrix always + // coorespond to a moveable APBlock or a star node. + if (netlist.block_mobility(target_blk_id) == APBlockMobility::MOVEABLE) { + // If the target is also moveable, update the coefficient matrix. + size_t target_row_id = (size_t)blk_id_to_row_id[target_blk_id]; + VTR_ASSERT_DEBUG(target_row_id < (size_t)A_sparse.rows()); + tripletList.emplace_back(src_row_id, src_row_id, weight); + tripletList.emplace_back(target_row_id, target_row_id, weight); + tripletList.emplace_back(src_row_id, target_row_id, -weight); + tripletList.emplace_back(target_row_id, src_row_id, -weight); + } else { + // If the target is fixed, update the coefficient matrix and the + // constant vectors. + tripletList.emplace_back(src_row_id, src_row_id, weight); + VTR_ASSERT_DEBUG(netlist.block_loc(target_blk_id).x >= 0); + VTR_ASSERT_DEBUG(netlist.block_loc(target_blk_id).y >= 0); + // FIXME: These fixed block locations are aligned to the anchor of + // the tiles they are in. This is not correct. A method + // should be added to the netlist class or to a util file + // which can get a more accurate position. + double blk_loc_x = netlist.block_loc(target_blk_id).x; + double blk_loc_y = netlist.block_loc(target_blk_id).y; + b_x(src_row_id) += weight * blk_loc_x; + b_y(src_row_id) += weight * blk_loc_y; + } +} + void QPHybridSolver::init_linear_system() { // Count the number of star nodes that the netlist will have. size_t num_star_nodes = 0; @@ -86,50 +154,20 @@ void QPHybridSolver::init_linear_system() { // Create a list of triplets that will be used to create the sparse // coefficient matrix. This is the method recommended by Eigen to initialize // this matrix. + // A triplet represents a non-zero entry in a sparse matrix: + // (row index, col index, value) + // Where triplets at the same (row index, col index) are summed together. std::vector> tripletList; // Reserve enough space for the triplets. This is just to help with // performance. + // This is an over-estimate that assumes that each net connnects to all + // moveable blocks using a star node. + // TODO: This can be made more space-efficient by getting the average fanout + // of all nets in the APNetlist. Ideally this should be not enough + // space, but be within a constant factor. size_t num_nets = netlist_.nets().size(); tripletList.reserve(num_moveable_blocks_ * num_nets); - // Lambda expression to add a connection to the linear system from the src - // to the target with the given weight. The src_row_id may represent a star - // node (so it does not represent an APBlock) or a moveable APBlock. The - // target_blk_id may be a fixed or moveable block. - auto add_connection_to_system = [&](size_t src_row_id, - APBlockId target_blk_id, - double weight) { - // Verify that this is a valid row. - VTR_ASSERT_DEBUG(src_row_id < A_sparse.rows()); - // Verify that this is a valid block id. - VTR_ASSERT_DEBUG(target_blk_id.is_valid()); - // The src_row_id is always a moveable block (rows in the matrix always - // coorespond to a moveable APBlock or a star node. - if (netlist_.block_mobility(target_blk_id) == APBlockMobility::MOVEABLE) { - // If the target is also moveable, update the coefficient matrix. - size_t target_row_id = (size_t)blk_id_to_row_id_[target_blk_id]; - VTR_ASSERT_DEBUG(target_row_id < A_sparse.rows()); - tripletList.emplace_back(src_row_id, src_row_id, weight); - tripletList.emplace_back(target_row_id, target_row_id, weight); - tripletList.emplace_back(src_row_id, target_row_id, -weight); - tripletList.emplace_back(target_row_id, src_row_id, -weight); - } else { - // If the target is fixed, update the coefficient matrix and the - // constant vectors. - tripletList.emplace_back(src_row_id, src_row_id, weight); - VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).x >= 0); - VTR_ASSERT_DEBUG(netlist_.block_loc(target_blk_id).y >= 0); - // FIXME: These fixed block locations are aligned to the anchor of - // the tiles they are in. This is not correct. A method - // should be added to the netlist class or to a util file - // which can get a more accurate position. - double blk_loc_x = netlist_.block_loc(target_blk_id).x; - double blk_loc_y = netlist_.block_loc(target_blk_id).y; - b_x(src_row_id) += weight * blk_loc_x; - b_y(src_row_id) += weight * blk_loc_y; - } - }; - // Create the connections using a hybrid connection model of the star and // clique connnection models. size_t star_node_offset = 0; @@ -140,17 +178,21 @@ void QPHybridSolver::init_linear_system() { // Create a star node and connect each block in the net to the star // node. // Using the weight from FastPlace + // TODO: Investigate other weight terms. double w = static_cast(num_pins) / static_cast(num_pins - 1); size_t star_node_id = num_moveable_blocks_ + star_node_offset; for (APPinId pin_id : netlist_.net_pins(net_id)) { APBlockId blk_id = netlist_.pin_block(pin_id); - add_connection_to_system(star_node_id, blk_id, w); + add_connection_to_system(star_node_id, blk_id, w, tripletList, + b_x, b_y, A_sparse, blk_id_to_row_id_, + netlist_); } star_node_offset++; } else { // Create a clique connection where every block in a net connects // exactly once to every other block in the net. // Using the weight from FastPlace + // TODO: Investigate other weight terms. double w = 1.0 / static_cast(num_pins - 1); for (size_t ipin_idx = 0; ipin_idx < num_pins; ipin_idx++) { APPinId first_pin_id = netlist_.net_pin(net_id, ipin_idx); @@ -171,7 +213,9 @@ void QPHybridSolver::init_linear_system() { std::swap(first_blk_id, second_blk_id); } size_t first_row_id = (size_t)blk_id_to_row_id_[first_blk_id]; - add_connection_to_system(first_row_id, second_blk_id, w); + add_connection_to_system(first_row_id, second_blk_id, w, tripletList, + b_x, b_y, A_sparse, blk_id_to_row_id_, + netlist_); } } } @@ -194,8 +238,19 @@ void QPHybridSolver::init_linear_system() { * b[i] = b[i] + pos[block(i)] * coeff_pseudo_anchor; * Where coeff_pseudo_anchor grows with each iteration. * - * This is basically a fast way of adding a connection between a moveable block - * and a fixed block. + * This is basically a fast way of adding a connection between all moveable + * blocks in the netlist and their target fixed placement location. + * + * See add_connection_to_system. + * + * @param A_sparse_diff The ceofficient matrix to update. + * @param b_x_diff The x-dimension constant vector to update. + * @param b_y_diff The y-dimension constant vector to update. + * @param p_placement The location the moveable blocks should be anchored + * to. + * @param num_moveable_blocks The number of moveable blocks in the netlist. + * @param row_id_to_blk_id Lookup for the row id from the APBlock Id. + * @param iteration The current iteration of the Global Placer. */ static inline void update_linear_system_with_anchors( Eigen::SparseMatrix &A_sparse_diff, @@ -252,9 +307,14 @@ void QPHybridSolver::solve(unsigned iteration, PartialPlacement &p_placement) { VTR_ASSERT(cg.info() == Eigen::Success && "Conjugate Gradient failed at solving b_y!"); // Write the results back into the partial placement object. + // NOTE: The first [0, num_moveable_blocks_) rows always represent the + // moveable APBlocks. The star nodes always come after and are ignored + // in the solution. for (size_t row_id_idx = 0; row_id_idx < num_moveable_blocks_; row_id_idx++) { APRowId row_id = APRowId(row_id_idx); APBlockId blk_id = row_id_to_blk_id_[row_id]; + VTR_ASSERT_DEBUG(blk_id.is_valid()); + VTR_ASSERT_DEBUG(netlist_.block_mobility(blk_id) == APBlockMobility::MOVEABLE); p_placement.block_x_locs[blk_id] = x[row_id_idx]; p_placement.block_y_locs[blk_id] = y[row_id_idx]; } diff --git a/vpr/src/analytical_place/analytical_solver.h b/vpr/src/analytical_place/analytical_solver.h index 7cb37bec6c8..a86eae6d073 100644 --- a/vpr/src/analytical_place/analytical_solver.h +++ b/vpr/src/analytical_place/analytical_solver.h @@ -55,7 +55,7 @@ class AnalyticalSolver { /** * @brief Constructor of the base AnalyticalSolver class * - * Initializes the internal data members of the base class which are usefull + * Initializes the internal data members of the base class which are useful * for all solvers. */ AnalyticalSolver(const APNetlist &netlist); @@ -148,7 +148,7 @@ class QPHybridSolver : public AnalyticalSolver { static constexpr size_t star_num_pins_threshold = 3; /** - * @brief Initializes the linear system of Ax = b_x and Ax = b_y based on + * @brief Initializes the linear system of Ax = b_x and Ay = b_y based on * the APNetlist and the fixed APBlock locations. * * This is the "ideal" quadratic linear system where no anchor-points are @@ -180,7 +180,7 @@ class QPHybridSolver : public AnalyticalSolver { * * Initializes internal data and constructs the initial linear system. */ - QPHybridSolver(const APNetlist& inetlist) : AnalyticalSolver(inetlist) { + QPHybridSolver(const APNetlist& netlist) : AnalyticalSolver(netlist) { // Initializing the linear system only depends on the netlist and fixed // block locations. Both are provided by the netlist, allowing this to // be initialized in the constructor.