Skip to content

Commit

Permalink
Merge pull request #2816 from AlexandreSinger/feature-greedy-clustere…
Browse files Browse the repository at this point in the history
…r-class

[Packer] Created GreedyClusterer Class
  • Loading branch information
vaughnbetz authored Nov 22, 2024
2 parents 3fa3148 + 2232cd4 commit 33c131a
Show file tree
Hide file tree
Showing 8 changed files with 283 additions and 164 deletions.
1 change: 0 additions & 1 deletion vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "ShowSetup.h"
#include "ap_netlist_fwd.h"
#include "check_netlist.h"
#include "cluster.h"
#include "cluster_legalizer.h"
#include "cluster_util.h"
#include "clustered_netlist.h"
Expand Down
1 change: 0 additions & 1 deletion vpr/src/base/vpr_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
#include "check_route.h"
#include "constant_nets.h"
#include "atom_netlist_utils.h"
#include "cluster.h"
#include "output_clustering.h"
#include "vpr_constraints_reader.h"
#include "place_constraints.h"
Expand Down
32 changes: 0 additions & 32 deletions vpr/src/pack/cluster.h

This file was deleted.

26 changes: 26 additions & 0 deletions vpr/src/pack/cluster_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1879,6 +1879,32 @@ void print_pb_type_count_recurr(t_pb_type* pb_type, size_t max_name_chars, size_
}
}

void print_pb_type_count(const ClusteredNetlist& clb_nlist) {
auto& device_ctx = g_vpr_ctx.device();

std::map<t_pb_type*, int> pb_type_count;

size_t max_depth = 0;
for (ClusterBlockId blk : clb_nlist.blocks()) {
size_t pb_max_depth = update_pb_type_count(clb_nlist.block_pb(blk), pb_type_count, 0);

max_depth = std::max(max_depth, pb_max_depth);
}

size_t max_pb_type_name_chars = 0;
for (auto& pb_type : pb_type_count) {
max_pb_type_name_chars = std::max(max_pb_type_name_chars, strlen(pb_type.first->name));
}

VTR_LOG("\nPb types usage...\n");
for (const auto& logical_block_type : device_ctx.logical_block_types) {
if (!logical_block_type.pb_type) continue;

print_pb_type_count_recurr(logical_block_type.pb_type, max_pb_type_name_chars + max_depth, 0, pb_type_count);
}
VTR_LOG("\n");
}

t_logical_block_type_ptr identify_logic_block_type(std::map<const t_model*, std::vector<t_logical_block_type_ptr>>& primitive_candidate_block_types) {
std::string lut_name = ".names";

Expand Down
5 changes: 5 additions & 0 deletions vpr/src/pack/cluster_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,11 @@ void update_le_count(const t_pb* pb, const t_logical_block_type_ptr logic_block_

void print_pb_type_count_recurr(t_pb_type* type, size_t max_name_chars, size_t curr_depth, std::map<t_pb_type*, int>& pb_type_count);

/**
* Print the total number of used physical blocks for each pb type in the architecture
*/
void print_pb_type_count(const ClusteredNetlist& clb_nlist);

/*
* @brief This function identifies the logic block type which is defined by the
* block type which has a lut primitive.
Expand Down
180 changes: 72 additions & 108 deletions vpr/src/pack/cluster.cpp → vpr/src/pack/greedy_clusterer.cpp

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions vpr/src/pack/greedy_clusterer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/**
* @file
* @author Alex Singer
* @date November 2024
* @brief The declarations of the Greedy Clusterer class which is used to
* encapsulate the process of greedy clustering.
*/

#pragma once

#include <map>
#include <unordered_set>
#include "physical_types.h"

// Forward declarations
class AtomNetId;
class AtomNetlist;
class AttractionInfo;
class ClusterLegalizer;
class Prepacker;
struct t_analysis_opts;
struct t_clustering_data;
struct t_pack_high_fanout_thresholds;
struct t_packer_opts;

/**
* @brief A clusterer that generates clusters by greedily choosing the clusters
* which appear to have the best gain for a given neighbor.
*
* This clusterer generates one cluster at a time by finding candidate molecules
* and selecting the molecule with the highest gain.
*
* The clusterer takes an Atom Netlist which has be pre-packed into pack
* patterns (e.g. carry chains) as input and produces a set of legal clusters
* of these pack molecules as output. Legality here means that it was able to
* find a valid intra-lb route for the inputs of the clusters, through the
* internal molecules, and to the outputs of the clusters.
*/
class GreedyClusterer {
public:
/**
* @brief Constructor of the Greedy Clusterer class.
*
* The clusterer may be invoked many times during the packing flow. This
* constructor will pre-compute information before clustering which can
* improve the performance of the clusterer.
*
* @param packer_opts
* Options passed by the user to configure the packing and
* clustering algorithms.
* @param analysis_opts
* Options passed by the user to configure timing analysis in
* the clusterer.
* @param atom_netlist
* The atom netlist to cluster over.
* @param arch
* The architecture to cluster over.
* @param high_fanout_thresholds
* The thresholds for what to consider as a high-fanout net
* for each logical block type. The clusterer will not consider
* nets with fanout higher than this to be important in
* candidate block selection (gain computation).
* A reason for it being per block type is that some blocks,
* like RAMs, have weak gains to other RAM primitives due to
* fairly high fanout address nets, so a higher fanout
* threshold for them is useful in generating a more dense
* packing.
* @param is_clock
* The set of clock nets in the Atom Netlist.
* @param is_global
* The set of global nets in the Atom Netlist. These will be
* routed on special dedicated networks, and hence are less
* relavent to locality / attraction.
*/
GreedyClusterer(const t_packer_opts& packer_opts,
const t_analysis_opts& analysis_opts,
const AtomNetlist& atom_netlist,
const t_arch* arch,
const t_pack_high_fanout_thresholds& high_fanout_thresholds,
const std::unordered_set<AtomNetId>& is_clock,
const std::unordered_set<AtomNetId>& is_global);

/**
* @brief Performs clustering on the pack molecules formed by the prepacker.
*
* The clustering is contained within the Cluster Legalizer.
*
* @param cluster_legalizer
* The cluster legalizer which is used to create clusters and
* grow clusters by adding molecules to a cluster.
* @param prepacker
* The prepacker object which contains the pack molecules that
* are atoms which are pre-packed before the main clustering
* (due to pack patterns, e.g. carry chains).
* @param allow_unrelated_clustering
* Allows primitives which have no attraction to the given
* cluster to be packed into it. This can lead to a denser
* packing, but tends to be bad for wirelength and timing.
* @param balance_block_type_utilization
* When true, tries to create clusters that balance the logical
* block type utilization. This is useful when some primitives
* have multiple logical block types to which they can cluster,
* e.g. multiple sizes of physical RAMs exist on the chip.
* @param attraction_groups
* Information on the attraction groups used during the
* clustering process. These are groups of primitives that have
* extra attraction to each other; currently they are used to
* guide the clusterer when it must cluster some parts of a
* design densely due to user placement/floorplanning
* constraints. They are created if some floorplan regions are
* overfilled after a clustering attempt.
*
* @return num_used_type_instances
* The number of used logical blocks of each type by the
* clustering. This information may be useful when detecting
* if the clustering can fit on the device.
*/
std::map<t_logical_block_type_ptr, size_t>
do_clustering(ClusterLegalizer& cluster_legalizer,
Prepacker& prepacker,
bool allow_unrelated_clustering,
bool balance_block_type_utilization,
AttractionInfo& attraction_groups);

private:
/*
* When attraction groups are created, the purpose is to pack more densely by adding more molecules
* from the cluster's attraction group to the cluster. In a normal flow, (when attraction groups are
* not on), the cluster keeps being packed until the get_molecule routines return either a repeated
* molecule or a nullptr. When attraction groups are on, we want to keep exploring molecules for the
* cluster until a nullptr is returned. So, the number of repeated molecules is changed from 1 to 500,
* effectively making the clusterer pack a cluster until a nullptr is returned.
*/
static constexpr int attraction_groups_max_repeated_molecules_ = 500;

/// @brief The packer options used to configure the clusterer.
const t_packer_opts& packer_opts_;

/// @brief The analysis options used to configure timing analysis within the
/// clusterer.
const t_analysis_opts& analysis_opts_;

/// @brief The atom netlist to cluster over.
const AtomNetlist& atom_netlist_;

/// @brief The device architecture to cluster onto.
const t_arch* arch_ = nullptr;

/// @brief The high-fanout thresholds per logical block type. Used to ignore
/// certain nets when calculating the gain for the next candidate
/// molecule to cluster.
const t_pack_high_fanout_thresholds& high_fanout_thresholds_;

/// @brief A set of atom nets which are considered as clocks.
const std::unordered_set<AtomNetId>& is_clock_;

/// @brief A set of atom nets which are considered as global nets.
const std::unordered_set<AtomNetId>& is_global_;

/// @brief Pre-computed logical block types for each model in the architecture.
std::map<const t_model*, std::vector<t_logical_block_type_ptr>> primitive_candidate_block_types_;
};

39 changes: 17 additions & 22 deletions vpr/src/pack/pack.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <unordered_set>

#include "SetupGrid.h"
#include "cluster.h"
#include "cluster_legalizer.h"
#include "cluster_util.h"
#include "constraints_report.h"
#include "globals.h"
#include "greedy_clusterer.h"
#include "pack.h"
#include "prepack.h"
#include "vpr_context.h"
Expand All @@ -29,7 +30,6 @@ bool try_pack(t_packer_opts* packer_opts,
const DeviceContext& device_ctx = g_vpr_ctx.device();

std::unordered_set<AtomNetId> is_clock, is_global;
t_clustering_data clustering_data;
VTR_LOG("Begin packing '%s'.\n", packer_opts->circuit_file_name.c_str());

is_clock = alloc_and_load_is_clock();
Expand Down Expand Up @@ -91,7 +91,6 @@ bool try_pack(t_packer_opts* packer_opts,
}

int pack_iteration = 1;
bool floorplan_regions_overfull = false;

// Initialize the cluster legalizer.
ClusterLegalizer cluster_legalizer(atom_ctx.nlist,
Expand All @@ -110,27 +109,25 @@ bool try_pack(t_packer_opts* packer_opts,
VTR_LOG("Packing with pin utilization targets: %s\n", cluster_legalizer.get_target_external_pin_util().to_string().c_str());
VTR_LOG("Packing with high fanout thresholds: %s\n", high_fanout_thresholds.to_string().c_str());

while (true) {
free_clustering_data(*packer_opts, clustering_data);

// Initialize the greedy clusterer.
GreedyClusterer clusterer(*packer_opts,
*analysis_opts,
atom_ctx.nlist,
arch,
high_fanout_thresholds,
is_clock,
is_global);

while (true) {
//Cluster the netlist
// num_used_type_instances: A map used to save the number of used
// instances from each logical block type.
std::map<t_logical_block_type_ptr, size_t> num_used_type_instances;
num_used_type_instances = do_clustering(*packer_opts,
*analysis_opts,
arch,
prepacker,
cluster_legalizer,
is_clock,
is_global,
allow_unrelated_clustering,
balance_block_type_util,
attraction_groups,
floorplan_regions_overfull,
high_fanout_thresholds,
clustering_data);
num_used_type_instances = clusterer.do_clustering(cluster_legalizer,
prepacker,
allow_unrelated_clustering,
balance_block_type_util,
attraction_groups);

//Try to size/find a device
bool fits_on_device = try_size_device_grid(*arch, num_used_type_instances, packer_opts->target_device_utilization, packer_opts->device_layout);
Expand All @@ -139,6 +136,7 @@ bool try_pack(t_packer_opts* packer_opts,
* is not dense enough and there are floorplan constraints, it is presumed that the constraints are the cause
* of the floorplan not fitting, so attraction groups are turned on for later iterations.
*/
bool floorplan_regions_overfull = floorplan_constraints_regions_overfull(cluster_legalizer);
bool floorplan_not_fitting = (floorplan_regions_overfull || g_vpr_ctx.floorplanning().constraints.get_num_partitions() > 0);

if (fits_on_device && !floorplan_regions_overfull) {
Expand Down Expand Up @@ -261,9 +259,6 @@ bool try_pack(t_packer_opts* packer_opts,
//check clustering and output it
check_and_output_clustering(cluster_legalizer, *packer_opts, is_clock, arch);

// Free Data Structures
free_clustering_data(*packer_opts, clustering_data);

VTR_LOG("\n");
VTR_LOG("Netlist conversion complete.\n");
VTR_LOG("\n");
Expand Down

0 comments on commit 33c131a

Please sign in to comment.