diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp new file mode 100644 index 0000000000..55bb010431 --- /dev/null +++ b/vpr/src/place/timing/delay_model/delta_delay_model.cpp @@ -0,0 +1,34 @@ + +#include "delta_delay_model.h" + +float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, + const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} + +void DeltaDelayModel::dump_echo(std::string filepath) const { + FILE* f = vtr::fopen(filepath.c_str(), "w"); + fprintf(f, " "); + for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { + fprintf(f, " %9zu", from_layer_num); + fprintf(f, "\n"); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9zu", dx); + } + fprintf(f, "\n"); + for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { + fprintf(f, "%9zu", dy); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); + } + fprintf(f, "\n"); + } + } + } + vtr::fclose(f); +} + diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.h b/vpr/src/place/timing/delay_model/delta_delay_model.h new file mode 100644 index 0000000000..c3ae0d83cf --- /dev/null +++ b/vpr/src/place/timing/delay_model/delta_delay_model.h @@ -0,0 +1,47 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class DeltaDelayModel + * + * @brief A simple delay model based on the distance (delta) between block locations. + */ +class DeltaDelayModel : public PlaceDelayModel { + public: + DeltaDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + DeltaDelayModel(float min_cross_layer_delay, + vtr::NdMatrix delta_delays, + bool is_flat) + : delays_(std::move(delta_delays)) + , cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + const vtr::NdMatrix& delays() const { + return delays_; + } + + private: + vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; +}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp new file mode 100644 index 0000000000..ceb8245511 --- /dev/null +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -0,0 +1,262 @@ + +#include "override_delay_model.h" + +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + +const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { + return base_delay_model_.get(); +} + +float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { + // First check to if there is an override delay value + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); + t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); + + t_override override_key; + override_key.from_type = from_type_ptr->index; + override_key.from_class = from_type_ptr->pin_class[from_pin]; + override_key.to_type = to_type_ptr->index; + override_key.to_class = to_type_ptr->pin_class[to_pin]; + + //Delay overrides may be different for +/- delta so do not use + //an absolute delta for the look-up + override_key.delta_x = to_loc.x - from_loc.x; + override_key.delta_y = to_loc.y - from_loc.y; + + float delay_val = std::numeric_limits::quiet_NaN(); + auto override_iter = delay_overrides_.find(override_key); + if (override_iter != delay_overrides_.end()) { + //Found an override + delay_val = override_iter->second; + } else { + //Fall back to the base delay model if no override was found + delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); + } + + return delay_val; +} + +void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { + t_override override_key; + override_key.from_type = from_type; + override_key.from_class = from_class; + override_key.to_type = to_type; + override_key.to_class = to_class; + override_key.delta_x = delta_x; + override_key.delta_y = delta_y; + + auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); + if (!res.second) { //Key already exists + res.first->second = delay_val; //Overwrite existing delay + } +} + +void OverrideDelayModel::dump_echo(std::string filepath) const { + base_delay_model_->dump_echo(filepath); + + FILE* f = vtr::fopen(filepath.c_str(), "a"); + + fprintf(f, "\n"); + fprintf(f, "# Delay Overrides\n"); + auto& device_ctx = g_vpr_ctx.device(); + for (auto kv : delay_overrides_) { + auto override_key = kv.first; + float delay_val = kv.second; + fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", + device_ctx.physical_tile_types[override_key.from_type].name.c_str(), + device_ctx.physical_tile_types[override_key.to_type].name.c_str(), + override_key.from_class, + override_key.to_class, + override_key.delta_x, + override_key.delta_y, + delay_val); + } + + vtr::fclose(f); +} + +float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { + t_override key; + key.from_type = from_type; + key.from_class = from_class; + key.to_type = to_type; + key.to_class = to_class; + key.delta_x = delta_x; + key.delta_y = delta_y; + + auto iter = delay_overrides_.find(key); + if (iter == delay_overrides_.end()) { + VPR_THROW(VPR_ERROR_PLACE, "Key not found."); + } + return iter->second; +} + +void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { + base_delay_model_ = std::move(base_delay_model_obj); +} + +/** + * When writing capnp targetted serialization, always allow compilation when + * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. + */ +#ifndef VTR_ENABLE_CAPNPROTO + +# define DISABLE_ERROR \ + "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." + +void DeltaDelayModel::read(const std::string& /*file*/) { + VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::read " DISABLE_ERROR); +} + +void DeltaDelayModel::write(const std::string& /*file*/) const { + VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write " DISABLE_ERROR); +} + +void OverrideDelayModel::read(const std::string& /*file*/) { + VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); +} + +void OverrideDelayModel::write(const std::string& /*file*/) const { + VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write " DISABLE_ERROR); +} + +#else /* VTR_ENABLE_CAPNPROTO */ + +static void ToFloat(float* out, const VprFloatEntry::Reader& in) { + // Getting a scalar field is always "get()". + *out = in.getValue(); +} + +static void FromFloat(VprFloatEntry::Builder* out, const float& in) { + // Setting a scalar field is always "set(value)". + out->setValue(in); +} + +void DeltaDelayModel::read(const std::string& file) { + // MmapFile object creates an mmap of the specified path, and will munmap + // when the object leaves scope. + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + + // FlatArrayMessageReader is used to read the message from the data array + // provided by MmapFile. + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + // When reading capnproto files the Reader object to use is named + // ::Reader. + // + // Initially this object is an empty VprDeltaDelayModel. + VprDeltaDelayModel::Reader model; + + // The reader.getRoot performs a cast from the generic capnproto to fit + // with the specified schema. + // + // Note that capnproto does not validate that the incoming data matches the + // schema. If this property is required, some form of check would be + // required. + model = reader.getRoot(); + + // ToNdMatrix is a generic function for converting a Matrix capnproto + // to a vtr::NdMatrix. + // + // The use must supply the matrix dimension (2 in this case), the source + // capnproto type (VprFloatEntry), + // target C++ type (flat), and a function to convert from the source capnproto + // type to the target C++ type (ToFloat). + // + // The second argument should be of type Matrix::Reader where X is the + // capnproto element type. + ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); +} + +void DeltaDelayModel::write(const std::string& file) const { + // MallocMessageBuilder object is the generate capnproto message builder, + // using malloc for buffer allocation. + ::capnp::MallocMessageBuilder builder; + + // initRoot returns a X::Builder object that can be used to set the + // fields in the message. + auto model = builder.initRoot(); + + // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a + // Matrix message. It is the mirror function of ToNdMatrix described in + // read above. + auto delay_values = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); + + // writeMessageToFile writes message to the specified file. + writeMessageToFile(file, &builder); +} + +void OverrideDelayModel::read(const std::string& file) { + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + vtr::NdMatrix delays; + auto model = reader.getRoot(); + ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); + + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); + + // Reading non-scalar capnproto fields is roughly equivilant to using + // a std::vector of the field type. Actual type is capnp::List::Reader. + auto overrides = model.getDelayOverrides(); + std::vector > overrides_arr(overrides.size()); + for (size_t i = 0; i < overrides.size(); ++i) { + const auto& elem = overrides[i]; + overrides_arr[i].first.from_type = elem.getFromType(); + overrides_arr[i].first.to_type = elem.getToType(); + overrides_arr[i].first.from_class = elem.getFromClass(); + overrides_arr[i].first.to_class = elem.getToClass(); + overrides_arr[i].first.delta_x = elem.getDeltaX(); + overrides_arr[i].first.delta_y = elem.getDeltaY(); + + overrides_arr[i].second = elem.getDelay(); + } + + delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); +} + +void OverrideDelayModel::write(const std::string& file) const { + ::capnp::MallocMessageBuilder builder; + auto model = builder.initRoot(); + + auto delays = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); + + // Non-scalar capnproto fields should be first initialized with + // init(count), and then accessed from the returned + // std::vector-like Builder object (specifically capnp::List::Builder). + auto overrides = model.initDelayOverrides(delay_overrides_.size()); + auto dst_iter = overrides.begin(); + for (const auto& src : delay_overrides_) { + auto elem = *dst_iter++; + elem.setFromType(src.first.from_type); + elem.setToType(src.first.to_type); + elem.setFromClass(src.first.from_class); + elem.setToClass(src.first.to_class); + elem.setDeltaX(src.first.delta_x); + elem.setDeltaY(src.first.delta_y); + + elem.setDelay(src.second); + } + + writeMessageToFile(file, &builder); +} + +#endif \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.h b/vpr/src/place/timing/delay_model/override_delay_model.h new file mode 100644 index 0000000000..23f6d01d70 --- /dev/null +++ b/vpr/src/place/timing/delay_model/override_delay_model.h @@ -0,0 +1,112 @@ + +#pragma once + +#include "place_delay_model.h" +#include "delta_delay_model.h" + +class OverrideDelayModel : public PlaceDelayModel { + public: + OverrideDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + /** + * @brief returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the + * specified from and to pins + */ + float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + public: //Mutators + void set_base_delay_model(std::unique_ptr base_delay_model); + const DeltaDelayModel* base_delay_model() const; + float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; + void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); + + private: + std::unique_ptr base_delay_model_; + /// Minimum delay of cross-layer connections + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; + + void compute_override_delay_model(RouterDelayProfiler& router, + const t_router_opts& router_opts); + + /** + * @brief Structure that allows delays to be queried from the delay model. + * + * Delay is calculated given the origin physical tile, the origin + * pin, the destination physical tile, and the destination pin. + * This structure encapsulates all these information. + * + * @param from_type, to_type + * Physical tile index (for easy array access) + * @param from_class, to_class + * The class that the pins belongs to. + * @param to_x, to_y + * The horizontal and vertical displacement + * between two physical tiles. + */ + struct t_override { + short from_type; + short to_type; + short from_class; + short to_class; + short delta_x; + short delta_y; + + /** + * @brief Comparison operator designed for performance. + * + * Operator< is important since t_override serves as the key into the + * map structure delay_overrides_. A default comparison operator would + * not be inlined by the compiler. + * + * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare + * is required for operator< to be inlined by compiler. Proper inlining of + * the function reduces place time by around 5%. + * + * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 + */ + friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { + const short* left = reinterpret_cast(&lhs); + const short* right = reinterpret_cast(&rhs); + constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); + return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); + } + }; + + /** + * @brief Map data structure that returns delay values according to + * specific delay model queries. + * + * Delay model queries are provided by the t_override structure, which + * encapsulates the information regarding the origin and the destination. + */ + vtr::flat_map2 delay_overrides_; + + /** + * operator< treats memory layout of t_override as an array of short. + * This requires all members of t_override are shorts and there is no + * padding between members of t_override. + */ + static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); + static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); +}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp index 4f626a5817..a91547a7e5 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/place_delay_model.cpp @@ -4,319 +4,16 @@ * routines related to the placer delay model. */ -#include #include "place_delay_model.h" + +#include + #include "globals.h" #include "router_lookahead_map.h" -#include "rr_graph2.h" - #include "timing_place_lookup.h" #include "placer_state.h" - -#include "vtr_log.h" -#include "vtr_math.h" #include "vpr_error.h" -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "place_delay_model.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif /* VTR_ENABLE_CAPNPROTO */ - -///@brief DeltaDelayModel methods. -float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -void DeltaDelayModel::dump_echo(std::string filepath) const { - FILE* f = vtr::fopen(filepath.c_str(), "w"); - fprintf(f, " "); - for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { - for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { - fprintf(f, " %9zu", from_layer_num); - fprintf(f, "\n"); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9zu", dx); - } - fprintf(f, "\n"); - for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { - fprintf(f, "%9zu", dy); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); - } - fprintf(f, "\n"); - } - } - } - vtr::fclose(f); -} - -const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { - return base_delay_model_.get(); -} - -///@brief OverrideDelayModel methods. -float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { - //First check to if there is an override delay value - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - - t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); - t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); - - t_override override_key; - override_key.from_type = from_type_ptr->index; - override_key.from_class = from_type_ptr->pin_class[from_pin]; - override_key.to_type = to_type_ptr->index; - override_key.to_class = to_type_ptr->pin_class[to_pin]; - - //Delay overrides may be different for +/- delta so do not use - //an absolute delta for the look-up - override_key.delta_x = to_loc.x - from_loc.x; - override_key.delta_y = to_loc.y - from_loc.y; - - float delay_val = std::numeric_limits::quiet_NaN(); - auto override_iter = delay_overrides_.find(override_key); - if (override_iter != delay_overrides_.end()) { - //Found an override - delay_val = override_iter->second; - } else { - //Fall back to the base delay model if no override was found - delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); - } - - return delay_val; -} - -void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { - t_override override_key; - override_key.from_type = from_type; - override_key.from_class = from_class; - override_key.to_type = to_type; - override_key.to_class = to_class; - override_key.delta_x = delta_x; - override_key.delta_y = delta_y; - - auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); - if (!res.second) { //Key already exists - res.first->second = delay_val; //Overwrite existing delay - } -} - -void OverrideDelayModel::dump_echo(std::string filepath) const { - base_delay_model_->dump_echo(filepath); - - FILE* f = vtr::fopen(filepath.c_str(), "a"); - - fprintf(f, "\n"); - fprintf(f, "# Delay Overrides\n"); - auto& device_ctx = g_vpr_ctx.device(); - for (auto kv : delay_overrides_) { - auto override_key = kv.first; - float delay_val = kv.second; - fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", - device_ctx.physical_tile_types[override_key.from_type].name.c_str(), - device_ctx.physical_tile_types[override_key.to_type].name.c_str(), - override_key.from_class, - override_key.to_class, - override_key.delta_x, - override_key.delta_y, - delay_val); - } - - vtr::fclose(f); -} - -float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { - t_override key; - key.from_type = from_type; - key.from_class = from_class; - key.to_type = to_type; - key.to_class = to_class; - key.delta_x = delta_x; - key.delta_y = delta_y; - - auto iter = delay_overrides_.find(key); - if (iter == delay_overrides_.end()) { - VPR_THROW(VPR_ERROR_PLACE, "Key not found."); - } - return iter->second; -} - -void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { - base_delay_model_ = std::move(base_delay_model_obj); -} - -float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; - return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -/** - * When writing capnp targetted serialization, always allow compilation when - * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. - */ -#ifndef VTR_ENABLE_CAPNPROTO - -# define DISABLE_ERROR \ - "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." - -void DeltaDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::read " DISABLE_ERROR); -} - -void DeltaDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write " DISABLE_ERROR); -} - -void OverrideDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); -} - -void OverrideDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write " DISABLE_ERROR); -} - -#else /* VTR_ENABLE_CAPNPROTO */ - -static void ToFloat(float* out, const VprFloatEntry::Reader& in) { - // Getting a scalar field is always "get()". - *out = in.getValue(); -} - -static void FromFloat(VprFloatEntry::Builder* out, const float& in) { - // Setting a scalar field is always "set(value)". - out->setValue(in); -} - -void DeltaDelayModel::read(const std::string& file) { - // MmapFile object creates an mmap of the specified path, and will munmap - // when the object leaves scope. - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - - // FlatArrayMessageReader is used to read the message from the data array - // provided by MmapFile. - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - // When reading capnproto files the Reader object to use is named - // ::Reader. - // - // Initially this object is an empty VprDeltaDelayModel. - VprDeltaDelayModel::Reader model; - - // The reader.getRoot performs a cast from the generic capnproto to fit - // with the specified schema. - // - // Note that capnproto does not validate that the incoming data matches the - // schema. If this property is required, some form of check would be - // required. - model = reader.getRoot(); - - // ToNdMatrix is a generic function for converting a Matrix capnproto - // to a vtr::NdMatrix. - // - // The use must supply the matrix dimension (2 in this case), the source - // capnproto type (VprFloatEntry), - // target C++ type (flat), and a function to convert from the source capnproto - // type to the target C++ type (ToFloat). - // - // The second argument should be of type Matrix::Reader where X is the - // capnproto element type. - ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); -} - -void DeltaDelayModel::write(const std::string& file) const { - // MallocMessageBuilder object is the generate capnproto message builder, - // using malloc for buffer allocation. - ::capnp::MallocMessageBuilder builder; - - // initRoot returns a X::Builder object that can be used to set the - // fields in the message. - auto model = builder.initRoot(); - - // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a - // Matrix message. It is the mirror function of ToNdMatrix described in - // read above. - auto delay_values = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); - - // writeMessageToFile writes message to the specified file. - writeMessageToFile(file, &builder); -} - -void OverrideDelayModel::read(const std::string& file) { - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - vtr::NdMatrix delays; - auto model = reader.getRoot(); - ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); - - base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); - - // Reading non-scalar capnproto fields is roughly equivilant to using - // a std::vector of the field type. Actual type is capnp::List::Reader. - auto overrides = model.getDelayOverrides(); - std::vector > overrides_arr(overrides.size()); - for (size_t i = 0; i < overrides.size(); ++i) { - const auto& elem = overrides[i]; - overrides_arr[i].first.from_type = elem.getFromType(); - overrides_arr[i].first.to_type = elem.getToType(); - overrides_arr[i].first.from_class = elem.getFromClass(); - overrides_arr[i].first.to_class = elem.getToClass(); - overrides_arr[i].first.delta_x = elem.getDeltaX(); - overrides_arr[i].first.delta_y = elem.getDeltaY(); - - overrides_arr[i].second = elem.getDelay(); - } - - delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); -} - -void OverrideDelayModel::write(const std::string& file) const { - ::capnp::MallocMessageBuilder builder; - auto model = builder.initRoot(); - - auto delays = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); - - // Non-scalar capnproto fields should be first initialized with - // init(count), and then accessed from the returned - // std::vector-like Builder object (specifically capnp::List::Builder). - auto overrides = model.initDelayOverrides(delay_overrides_.size()); - auto dst_iter = overrides.begin(); - for (const auto& src : delay_overrides_) { - auto elem = *dst_iter++; - elem.setFromType(src.first.from_type); - elem.setToType(src.first.to_type); - elem.setFromClass(src.first.from_class); - elem.setToClass(src.first.to_class); - elem.setDeltaX(src.first.delta_x); - elem.setDeltaY(src.first.delta_y); - - elem.setDelay(src.second); - } - - writeMessageToFile(file, &builder); -} - -#endif - ///@brief Initialize the placer delay model. std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, diff --git a/vpr/src/place/timing/delay_model/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h index 0aa01385e6..e361f8cc19 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.h +++ b/vpr/src/place/timing/delay_model/place_delay_model.h @@ -5,6 +5,7 @@ */ #pragma once + #include "vtr_ndmatrix.h" #include "vtr_flat_map.h" #include "vpr_types.h" @@ -54,11 +55,10 @@ class PlaceDelayModel { virtual ~PlaceDelayModel() = default; ///@brief Computes place delay model. - virtual void compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) + virtual void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) = 0; /** @@ -86,175 +86,5 @@ class PlaceDelayModel { virtual void read(const std::string& file) = 0; }; -///@brief A simple delay model based on the distance (delta) between block locations. -class DeltaDelayModel : public PlaceDelayModel { - public: - DeltaDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - DeltaDelayModel(float min_cross_layer_delay, - vtr::NdMatrix delta_delays, - bool is_flat) - : delays_(std::move(delta_delays)) - , cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - - void compute( - RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - const vtr::NdMatrix& delays() const { - return delays_; - } - - private: - vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] - float cross_layer_delay_; - /** - * @brief Indicates whether the router is a two-stage or run-flat - */ - bool is_flat_; -}; - -class OverrideDelayModel : public PlaceDelayModel { - public: - OverrideDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - void compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the - // specified from and to pins - float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - - public: //Mutators - void set_base_delay_model(std::unique_ptr base_delay_model); - const DeltaDelayModel* base_delay_model() const; - float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; - void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); - - private: - std::unique_ptr base_delay_model_; - /** - * @brief Minimum delay of cross-layer connections - */ - float cross_layer_delay_; - /** - * @brief Indicates whether the router is a two-stage or run-flat - */ - bool is_flat_; - void compute_override_delay_model(RouterDelayProfiler& router, - const t_router_opts& router_opts); - /** - * @brief Structure that allows delays to be queried from the delay model. - * - * Delay is calculated given the origin physical tile, the origin - * pin, the destination physical tile, and the destination pin. - * This structure encapsulates all these information. - * - * @param from_type, to_type - * Physical tile index (for easy array access) - * @param from_class, to_class - * The class that the pins belongs to. - * @param to_x, to_y - * The horizontal and vertical displacement - * between two physical tiles. - */ - struct t_override { - short from_type; - short to_type; - short from_class; - short to_class; - short delta_x; - short delta_y; - - /** - * @brief Comparison operator designed for performance. - * - * Operator< is important since t_override serves as the key into the - * map structure delay_overrides_. A default comparison operator would - * not be inlined by the compiler. - * - * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare - * is required for operator< to be inlined by compiler. Proper inlining of - * the function reduces place time by around 5%. - * - * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 - */ - friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { - const short* left = reinterpret_cast(&lhs); - const short* right = reinterpret_cast(&rhs); - constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); - return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); - } - }; - - /** - * @brief Map data structure that returns delay values according to - * specific delay model queries. - * - * Delay model queries are provided by the t_override structure, which - * encapsulates the information regarding the origin and the destination. - */ - vtr::flat_map2 delay_overrides_; - - /** - * operator< treats memory layout of t_override as an array of short. - * This requires all members of t_override are shorts and there is no - * padding between members of t_override. - */ - static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); - static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); -}; - -///@brief A simple delay model based on the information stored in router lookahead -/// This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router -class SimpleDelayModel : public PlaceDelayModel { - public: - SimpleDelayModel() {} - - void compute( - RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - void dump_echo(std::string /*filepath*/) const override {} - - void read(const std::string& /*file*/) override {} - void write(const std::string& /*file*/) const override {} - - private: - /** - * @brief The matrix to store the minimum delay between different points on different layers. - * - *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers - *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs - *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers - *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. - *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. - */ - vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] -}; diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp new file mode 100644 index 0000000000..0031d9eb1f --- /dev/null +++ b/vpr/src/place/timing/delay_model/simple_delay_model.cpp @@ -0,0 +1,45 @@ + +#include "simple_delay_model.h" + + +void SimpleDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& /*placer_opts*/, + const t_router_opts& /*router_opts*/, + int /*longest_length*/) { + const auto& grid = g_vpr_ctx.device().grid; + const size_t num_physical_tile_types = g_vpr_ctx.device().physical_tile_types.size(); + const size_t num_layers = grid.get_num_layers(); + + // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] + // The second index related to the layer that the source location is on and the third index is for the sink layer + delays_ = vtr::NdMatrix({num_physical_tile_types, + num_layers, + num_layers, + grid.width(), + grid.height()}); + + for (size_t physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { + for (size_t from_layer = 0; from_layer < num_layers; ++from_layer) { + for (size_t to_layer = 0; to_layer < num_layers; ++to_layer) { + for (size_t dx = 0; dx < grid.width(); ++dx) { + for (size_t dy = 0; dy < grid.height(); ++dy) { + float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, + from_layer, + to_layer, + dx, + dy); + delays_[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; + } + } + } + } + } +} + +float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; + return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.h b/vpr/src/place/timing/delay_model/simple_delay_model.h new file mode 100644 index 0000000000..f5a856688c --- /dev/null +++ b/vpr/src/place/timing/delay_model/simple_delay_model.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class SimpleDelayModel + * @brief A simple delay model based on the information stored in router lookahead + * This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router + */ +class SimpleDelayModel : public PlaceDelayModel { + public: + SimpleDelayModel() {} + + /// @brief Use the information in the router lookahead to fill the delay matrix instead of running the router + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string /*filepath*/) const override {} + + void read(const std::string& /*file*/) override {} + void write(const std::string& /*file*/) const override {} + + private: + /** + * @brief The matrix to store the minimum delay between different points on different layers. + * + *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers + *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs + *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers + *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. + *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. + */ + vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] +}; \ No newline at end of file diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index fa6a9acb0b..76b06bbc55 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -25,6 +25,9 @@ #include "route_profiling.h" #include "router_delay_profiling.h" #include "place_delay_model.h" +#include "simple_delay_model.h" +#include "delta_delay_model.h" +#include "override_delay_model.h" /*To compute delay between blocks we calculate the delay between */ /*different nodes in the FPGA. From this procedure we generate @@ -123,13 +126,6 @@ static vtr::NdMatrix compute_delta_delay_model( int longest_length, bool is_flat); -/** - * @brief Use the information in the router lookahead to fill the delay matrix instead of running the router - * @param route_profiler - * @return The delay matrix that contain the minimum cost between two locations - */ -static vtr::NdMatrix compute_simple_delay_model(RouterDelayProfiler& route_profiler); - static bool find_direct_connect_sample_locations(const t_direct_inf* direct, t_physical_tile_type_ptr from_type, int from_pin, @@ -209,11 +205,10 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& return place_delay_model; } -void DeltaDelayModel::compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { +void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { delays_ = compute_delta_delay_model( route_profiler, placer_opts, router_opts, /*measure_directconnect=*/true, @@ -237,14 +232,6 @@ void OverrideDelayModel::compute( compute_override_delay_model(route_profiler, router_opts); } -void SimpleDelayModel::compute( - RouterDelayProfiler& router, - const t_placer_opts& /*placer_opts*/, - const t_router_opts& /*router_opts*/, - int /*longest_length*/) { - delays_ = compute_simple_delay_model(router); -} - /******* File Accessible Functions **********/ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { @@ -1028,36 +1015,7 @@ static vtr::NdMatrix compute_delta_delay_model( return delta_delays; } -static vtr::NdMatrix compute_simple_delay_model(RouterDelayProfiler& route_profiler) { - const auto& grid = g_vpr_ctx.device().grid; - int num_physical_tile_types = static_cast(g_vpr_ctx.device().physical_tile_types.size()); - // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] - // The second index related to the layer that the source location is on and the third index is for the sink layer - vtr::NdMatrix delta_delays({static_cast(num_physical_tile_types), - static_cast(grid.get_num_layers()), - static_cast(grid.get_num_layers()), - grid.width(), - grid.height()}); - - for (int physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { - for (int from_layer = 0; from_layer < grid.get_num_layers(); ++from_layer) { - for (int to_layer = 0; to_layer < grid.get_num_layers(); ++to_layer) { - for (int dx = 0; dx < static_cast(grid.width()); ++dx) { - for (int dy = 0; dy < static_cast(grid.height()); ++dy) { - float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, - from_layer, - to_layer, - dx, - dy); - delta_delays[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; - } - } - } - } - } - return delta_delays; -} //Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification static bool find_direct_connect_sample_locations(const t_direct_inf* direct,