Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Packer] Created GreedyClusterer Class #2816

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "ShowSetup.h"
#include "ap_netlist_fwd.h"
#include "check_netlist.h"
#include "cluster.h"
#include "cluster_legalizer.h"
#include "cluster_util.h"
#include "clustered_netlist.h"
Expand Down
1 change: 0 additions & 1 deletion vpr/src/base/vpr_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
#include "check_route.h"
#include "constant_nets.h"
#include "atom_netlist_utils.h"
#include "cluster.h"
#include "output_clustering.h"
#include "vpr_constraints_reader.h"
#include "place_constraints.h"
Expand Down
32 changes: 0 additions & 32 deletions vpr/src/pack/cluster.h

This file was deleted.

26 changes: 26 additions & 0 deletions vpr/src/pack/cluster_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,32 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_
}
}

void print_pb_type_count(const ClusteredNetlist& clb_nlist) {
auto& device_ctx = g_vpr_ctx.device();

std::map<t_pb_type*, int> pb_type_count;

size_t max_depth = 0;
for (ClusterBlockId blk : clb_nlist.blocks()) {
size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0);

max_depth = std::max(max_depth, pb_max_depth);
}

size_t max_pb_type_name_chars = 0;
for (auto& pb_type : pb_type_count) {
max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name));
}

VTR_LOG("\nPb types usage...\n");
for (const auto& logical_block_type : device_ctx.logical_block_types) {
if (!logical_block_type.pb_type) continue;

print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count);
}
VTR_LOG("\n");
}

t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
std::string lut_name = ".names";

Expand Down
5 changes: 5 additions & 0 deletions vpr/src/pack/cluster_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,11 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_

void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);

/**
* Print the total number of used physical blocks for each pb type in the architecture
*/
void print_pb_type_count(const ClusteredNetlist& clb_nlist);

/*
* @brief This function identifies the logic block type which is defined by the
* block type which has a lut primitive.
Expand Down
180 changes: 72 additions & 108 deletions vpr/src/pack/cluster.cpp → vpr/src/pack/greedy_clusterer.cpp

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions vpr/src/pack/greedy_clusterer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/**
* @file
* @author Alex Singer
* @date November 2024
* @brief The declarations of the Greedy Clusterer class which is used to
* encapsulate the process of greedy clustering.
*/

#pragma once

#include <map>
#include <unordered_set>
#include "physical_types.h"

// Forward declarations
class AtomNetId;
class AtomNetlist;
class AttractionInfo;
class ClusterLegalizer;
class Prepacker;
struct t_analysis_opts;
struct t_clustering_data;
struct t_pack_high_fanout_thresholds;
struct t_packer_opts;

/**
* @brief A clusterer that generates clusters by greedily choosing the clusters
* which appear to have the best gain for a given neighbor.
*
* This clusterer generates one cluster at a time by finding candidate molecules
* and selecting the molecule with the highest gain.
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
*
* The clusterer takes an Atom Netlist which has be pre-packed into pack
* patterns (e.g. carry chains) as input and produces a set of legal clusters
* of these pack molecules as output. Legality here means that it was able to
* find a valid intra-lb route for the inputs of the clusters, through the
* internal molecules, and to the outputs of the clusters.
*/
class GreedyClusterer {
public:
/**
* @brief Constructor of the Greedy Clusterer class.
*
* The clusterer may be invoked many times during the packing flow. This
* constructor will pre-compute information before clustering which can
* improve the performance of the clusterer.
*
* @param packer_opts
* Options passed by the user to configure the packing and
* clustering algorithms.
* @param analysis_opts
* Options passed by the user to configure timing analysis in
* the clusterer.
* @param atom_netlist
* The atom netlist to cluster over.
* @param arch
* The architecture to cluster over.
* @param high_fanout_thresholds
* The thresholds for what to consider as a high-fanout net
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
* for each logical block type. The clusterer will not consider
* nets with fanout higher than this to be important in
* candidate block selection (gain computation).
* A reason for it being per block type is that some blocks,
* like RAMs, have weak gains to other RAM primitives due to
* fairly high fanout address nets, so a higher fanout
* threshold for them is useful in generating a more dense
* packing.
* @param is_clock
* The set of clock nets in the Atom Netlist.
* @param is_global
* The set of global nets in the Atom Netlist. These will be
* routed on special dedicated networks, and hence are less
* relavent to locality / attraction.
*/
GreedyClusterer(const t_packer_opts& packer_opts,
const t_analysis_opts& analysis_opts,
const AtomNetlist& atom_netlist,
const t_arch* arch,
const t_pack_high_fanout_thresholds& high_fanout_thresholds,
const std::unordered_set<AtomNetId>& is_clock,
const std::unordered_set<AtomNetId>& is_global);

/**
* @brief Performs clustering on the pack molecules formed by the prepacker.
*
* The clustering is contained within the Cluster Legalizer.
*
* @param cluster_legalizer
* The cluster legalizer which is used to create clusters and
* grow clusters by adding molecules to a cluster.
* @param prepacker
* The prepacker object which contains the pack molecules that
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
* are atoms which are pre-packed before the main clustering
* (due to pack patterns, e.g. carry chains).
* @param allow_unrelated_clustering
* Allows primitives which have no attraction to the given
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
* cluster to be packed into it. This can lead to a denser
* packing, but tends to be bad for wirelength and timing.
* @param balance_block_type_utilization
* When true, tries to create clusters that balance the logical
* block type utilization. This is useful when some primitives
* have multiple logical block types to which they can cluster,
* e.g. multiple sizes of physical RAMs exist on the chip.
* @param attraction_groups
* Information on the attraction groups used during the
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
* clustering process. These are groups of primitives that have
* extra attraction to each other; currently they are used to
* guide the clusterer when it must cluster some parts of a
* design densely due to user placement/floorplanning
* constraints. They are created if some floorplan regions are
* overfilled after a clustering attempt.
*
* @return num_used_type_instances
* The number of used logical blocks of each type by the
* clustering. This information may be useful when detecting
* if the clustering can fit on the device.
*/
std::map<t_logical_block_type_ptr, size_t>
do_clustering(ClusterLegalizer& cluster_legalizer,
Prepacker& prepacker,
bool allow_unrelated_clustering,
bool balance_block_type_utilization,
AttractionInfo& attraction_groups);

private:
/*
* When attraction groups are created, the purpose is to pack more densely by adding more molecules
* from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
* not on), the cluster keeps being packed until the get_molecule routines return either a repeated
* molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
* cluster until a nullptr is returned. So, the number of repeated molecules is changed from 1 to 500,
* effectively making the clusterer pack a cluster until a nullptr is returned.
*/
static constexpr int attraction_groups_max_repeated_molecules_ = 500;

/// @brief The packer options used to configure the clusterer.
const t_packer_opts& packer_opts_;

/// @brief The analysis options used to configure timing analysis within the
/// clusterer.
const t_analysis_opts& analysis_opts_;

/// @brief The atom netlist to cluster over.
const AtomNetlist& atom_netlist_;

/// @brief The device architecture to cluster onto.
const t_arch* arch_ = nullptr;

/// @brief The high-fanout thresholds per logical block type. Used to ignore
/// certain nets when calculating the gain for the next candidate
/// molecule to cluster.
const t_pack_high_fanout_thresholds& high_fanout_thresholds_;
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved

/// @brief A set of atom nets which are considered as clocks.
const std::unordered_set<AtomNetId>& is_clock_;

/// @brief A set of atom nets which are considered as global nets.
const std::unordered_set<AtomNetId>& is_global_;

/// @brief Pre-computed logical block types for each model in the architecture.
std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
};

39 changes: 17 additions & 22 deletions vpr/src/pack/pack.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <unordered_set>

#include "SetupGrid.h"
#include "cluster.h"
#include "cluster_legalizer.h"
#include "cluster_util.h"
#include "constraints_report.h"
#include "globals.h"
#include "greedy_clusterer.h"
#include "pack.h"
#include "prepack.h"
#include "vpr_context.h"
Expand All @@ -29,7 +30,6 @@ bool try_pack(t_packer_opts* packer_opts,
const DeviceContext& device_ctx = g_vpr_ctx.device();

std::unordered_set<AtomNetId> is_clock, is_global;
t_clustering_data clustering_data;
VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());

is_clock = alloc_and_load_is_clock();
Expand Down Expand Up @@ -91,7 +91,6 @@ bool try_pack(t_packer_opts* packer_opts,
}

int pack_iteration = 1;
bool floorplan_regions_overfull = false;

// Initialize the cluster legalizer.
ClusterLegalizer cluster_legalizer(atom_ctx.nlist,
Expand All @@ -110,27 +109,25 @@ bool try_pack(t_packer_opts* packer_opts,
VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str());
VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str());

while (true) {
free_clustering_data(*packer_opts, clustering_data);

// Initialize the greedy clusterer.
GreedyClusterer clusterer(*packer_opts,
*analysis_opts,
atom_ctx.nlist,
arch,
high_fanout_thresholds,
is_clock,
is_global);

while (true) {
//Cluster the netlist
// num_used_type_instances: A map used to save the number of used
// instances from each logical block type.
std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
num_used_type_instances = do_clustering(*packer_opts,
*analysis_opts,
arch,
prepacker,
cluster_legalizer,
is_clock,
is_global,
allow_unrelated_clustering,
balance_block_type_util,
attraction_groups,
floorplan_regions_overfull,
high_fanout_thresholds,
clustering_data);
num_used_type_instances = clusterer.do_clustering(cluster_legalizer,
prepacker,
allow_unrelated_clustering,
balance_block_type_util,
attraction_groups);

//Try to size/find a device
bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
Expand All @@ -139,6 +136,7 @@ bool try_pack(t_packer_opts* packer_opts,
* is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause
* of the floorplan not fitting, so attraction groups are turned on for later iterations.
*/
bool floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer);
bool floorplan_not_fitting = (floorplan_regions_overfull || g_vpr_ctx.floorplanning().constraints.get_num_partitions() > 0);

if (fits_on_device && !floorplan_regions_overfull) {
Expand Down Expand Up @@ -261,9 +259,6 @@ bool try_pack(t_packer_opts* packer_opts,
//check clustering and output it
check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch);

// Free Data Structures
free_clustering_data(*packer_opts, clustering_data);

VTR_LOG("\n");
VTR_LOG("Netlist conversion complete.\n");
VTR_LOG("\n");
Expand Down