From ac4f788d6b41466f552711932c3b3d09218a9415 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 16 Jul 2024 17:42:42 -0400 Subject: [PATCH 01/14] add golden results for 3d regression test --- .../vpr_tight_floorplan_3d/config/config.txt | 2 +- .../vpr_tight_floorplan_3d/config/golden_results.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/golden_results.txt diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/config.txt index 6259b6d82ad..f536515b9ef 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/config.txt @@ -21,7 +21,7 @@ parse_file=vpr_standard.txt qor_parse_file=qor_standard.txt # Pass requirements -#pass_requirements_file=pass_requirements.txt +pass_requirements_file=pass_requirements.txt # Script parameters script_params_common =-starting_stage vpr --route_chan_width 300 --max_router_iterations 400 --router_lookahead map --initial_pres_fac 1.0 --router_profiler_astar_fac 1.5 --seed 3 --device neuron3d diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/golden_results.txt new file mode 100644 index 00000000000..7b7954f2f05 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/config/golden_results.txt @@ -0,0 +1,4 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +3d_full_OPIN_inter_die_stratixiv_arch.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/one_big_partition.xml 835.38 vpr 2.86 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success 16adbfa-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-15T00:59:58 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 2998168 42 35 119888 86875 1 50931 3418 92 68 12512 -1 neuron3d 1871.5 MiB 125.92 471295 2911586 1070954 1811088 29544 2883.9 MiB 225.67 2.10 7.73071 -71900.2 -6.73071 5.03261 0.22 0.369023 0.308324 43.731 35.9598 -1 672628 24 0 0 2.71622e+08 21708.9 79.02 68.1319 57.7539 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +3d_full_OPIN_inter_die_stratixiv_arch.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/half_blocks_right_left.xml 820.30 vpr 2.86 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success 16adbfa-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-15T00:59:58 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 2995656 42 35 119888 86875 1 50982 3427 92 68 12512 -1 neuron3d 1869.5 MiB 127.02 455014 2973417 1101001 1762977 109439 2881.6 MiB 223.03 1.76 7.42684 -70436.4 -6.42684 5.75387 0.23 0.367143 0.304401 44.7203 37.4852 -1 654498 28 0 0 2.71622e+08 21708.9 78.86 71.0006 61.0273 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +3d_full_OPIN_inter_die_stratixiv_arch.timing.xml neuron_stratixiv_arch_timing.blif common_-sdc_file_sdc/samples/neuron_stratixiv_arch_timing.sdc_-read_vpr_constraints_tasks/regression_tests/vtr_reg_nightly_test5/vpr_tight_floorplan_3d/half_blocks_up_down.xml 812.45 vpr 2.86 GiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 42 -1 -1 success 16adbfa-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-07-15T00:59:58 gh-actions-runner-vtr-auto-spawned49 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 2995760 42 35 119888 86875 1 50982 3427 92 68 12512 -1 neuron3d 1869.6 MiB 125.77 465634 3050895 1119051 1501125 430719 2881.9 MiB 206.05 1.47 7.58328 -72552.7 -6.58328 5.5951 0.22 0.365879 0.300546 45.7365 37.5853 -1 663695 23 0 0 2.71622e+08 21708.9 79.71 69.6049 58.853 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 From 4877829ecbdd6603b4fe55f6fa636f4defa27a4c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:54:26 +0000 Subject: [PATCH 02/14] Bump libs/EXTERNAL/libcatch2 from `4e8d92b` to `33e24b1` Bumps [libs/EXTERNAL/libcatch2](https://github.com/catchorg/Catch2) from `4e8d92b` to `33e24b1`. - [Release notes](https://github.com/catchorg/Catch2/releases) - [Commits](https://github.com/catchorg/Catch2/compare/4e8d92bf02f7d1c8006a0e7a5ecabd8e62d98502...33e24b14fcf95c1c31c8d6b68f445ebb12e026cc) --- updated-dependencies: - dependency-name: libs/EXTERNAL/libcatch2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- libs/EXTERNAL/libcatch2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/EXTERNAL/libcatch2 b/libs/EXTERNAL/libcatch2 index 4e8d92bf02f..33e24b14fcf 160000 --- a/libs/EXTERNAL/libcatch2 +++ b/libs/EXTERNAL/libcatch2 @@ -1 +1 @@ -Subproject commit 4e8d92bf02f7d1c8006a0e7a5ecabd8e62d98502 +Subproject commit 33e24b14fcf95c1c31c8d6b68f445ebb12e026cc From e434d147a9bd9f0e196aaa774a4008ab1eca868e Mon Sep 17 00:00:00 2001 From: amin1377 Date: Fri, 2 Aug 2024 08:53:38 -0400 Subject: [PATCH 03/14] [vpr][rr_graph] add new switch types when rr graph is read and run-flat is enabled --- vpr/src/route/rr_graph.cpp | 76 +++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index f0e8e6f4b16..b184d304b05 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -271,7 +271,8 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const int j, t_rr_edge_info_set& rr_edges_to_create, const int delayless_switch, - t_physical_tile_type_ptr physical_type_ptr); + t_physical_tile_type_ptr physical_type_ptr, + bool is_remapped); static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, std::map& arch_sw_inf_map, @@ -374,6 +375,8 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, t_logical_block_type_ptr logical_block, const t_pb* pb, const t_cluster_pin_chain& nodes_to_collapse, + float R_minW_nmos, + float R_minW_pmos, int rel_cap, int layer, int i, @@ -741,6 +744,20 @@ void create_rr_graph(const t_graph_type graph_type, } if (is_flat) { + short delayless_switch = OPEN; + if (load_rr_graph) { + const auto& rr_switches = device_ctx.rr_graph.rr_switch(); + for (int switch_id = 0; switch_id < rr_switches.size(); switch_id++){ + const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; + if (rr_switch.name.find("delayless") != std::string::npos) { + delayless_switch = static_cast(switch_id); + break; + } + } + } else { + delayless_switch = det_routing_arch->delayless_switch; + } + VTR_ASSERT(delayless_switch != OPEN); build_intra_cluster_rr_graph(graph_type, grid, block_types, @@ -2021,6 +2038,9 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder /* If Fc gets clipped, this will be flagged to true */ *Fc_clipped = false; + /* This function is called to build the general routing graph resoruces. Thus, the edges are not remapped yet.*/ + bool is_remapped = false; + int num_edges = 0; /* Connection SINKS and SOURCES to their pins - Initializing IPINs/OPINs. */ for (int layer = 0; layer < grid.get_num_layers(); ++layer) { @@ -2053,7 +2073,8 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder j, rr_edges_to_create, delayless_switch, - physical_tile); + physical_tile, + is_remapped); //Create the actual SOURCE->OPIN, IPIN->SINK edges uniquify_edges(rr_edges_to_create); @@ -2270,7 +2291,8 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build j, rr_edges_to_create, delayless_switch, - physical_tile); + physical_tile, + load_rr_graph); //Create the actual SOURCE->OPIN, IPIN->SINK edges uniquify_edges(rr_edges_to_create); @@ -2454,7 +2476,8 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const int j, t_rr_edge_info_set& rr_edges_to_create, const int delayless_switch, - t_physical_tile_type_ptr physical_type_ptr) { + t_physical_tile_type_ptr physical_type_ptr, + bool is_remapped) { for (auto class_num : class_num_vec) { const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num); auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num); @@ -2474,11 +2497,11 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, auto pin_type = get_pin_type_from_pin_physical_num(physical_type_ptr, pin_num); if (class_type == DRIVER) { VTR_ASSERT(pin_type == DRIVER); - rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, false); + rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, is_remapped); } else { VTR_ASSERT(class_type == RECEIVER); VTR_ASSERT(pin_type == RECEIVER); - rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, false); + rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, is_remapped); } } } @@ -2682,6 +2705,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, logical_block, pb, nodes_to_collapse, + R_minW_nmos, + R_minW_pmos, rel_cap, layer, i, @@ -2714,6 +2739,8 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, t_logical_block_type_ptr logical_block, const t_pb* pb, const t_cluster_pin_chain& nodes_to_collapse, + float R_minW_nmos, + float R_minW_pmos, int rel_cap, int layer, int i, @@ -2773,23 +2800,15 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, conn_pin_physical_num); if (is_remapped) { - bool found = false; + auto& all_sw_inf = g_vpr_ctx.mutable_device().all_sw_inf; float delay = g_vpr_ctx.device().all_sw_inf.at(sw_idx).Tdel(); - const auto& rr_switches = rr_graph_builder.rr_switch(); - for (int sw_id = 0; sw_id < (int)rr_switches.size(); sw_id++) { - const auto& rr_switch = rr_switches[RRSwitchId(sw_id)]; - if (rr_switch.intra_tile) { - if (rr_switch.Tdel == delay) { - sw_idx = sw_id; - found = true; - break; - } - } - } - // If the graph is loaded from a file, we expect that all sw types are already listed there since currently, we are not doing any further - // Optimization. If the optimization done when the rr graph file was generated is different from the current optimization, in the case that - // these optimizations create different RR switches, this VTR ASSERT can be removed. - VTR_ASSERT(found); + bool is_new_sw; + std::tie(is_new_sw, sw_idx) = find_create_intra_cluster_sw(rr_graph_builder, + all_sw_inf, + R_minW_nmos, + R_minW_pmos, + is_remapped, + delay); } rr_edges_to_create.emplace_back(parent_pin_node_id, conn_pin_node_id, sw_idx, is_remapped); } @@ -2960,19 +2979,10 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, is_rr_sw_id, delay); - if (!is_rr_sw_id && is_new_sw) { - // Currently we assume that if rr graph is read from a file, we shouldn't get into this block - VTR_ASSERT(!load_rr_graph); - // The internal edges are added after switch_fanin_remap is initialized; thus, if a new arch_sw is added, - // switch _fanin_remap should be updated. - t_rr_switch_inf rr_sw_inf = create_rr_switch_from_arch_switch(create_internal_arch_sw(delay), - R_minW_nmos, - R_minW_pmos); - auto rr_sw_id = rr_graph_builder.add_rr_switch(rr_sw_inf); - // If rr graph is loaded from a file, switch_fanin_remap is going to be empty + if (is_new_sw) { if (!load_rr_graph) { auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; - switch_fanin_remap.push_back({{UNDEFINED, size_t(rr_sw_id)}}); + switch_fanin_remap.push_back({{UNDEFINED, size_t(sw_id)}}); } } From 9642bb4ea9a05344dd19177eb35464e37dfe5170 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Fri, 2 Aug 2024 09:45:05 -0400 Subject: [PATCH 04/14] fix typo --- vpr/src/route/rr_graph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index b184d304b05..37dde2fbee2 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -747,7 +747,7 @@ void create_rr_graph(const t_graph_type graph_type, short delayless_switch = OPEN; if (load_rr_graph) { const auto& rr_switches = device_ctx.rr_graph.rr_switch(); - for (int switch_id = 0; switch_id < rr_switches.size(); switch_id++){ + for (int switch_id = 0; switch_id < static_cast(rr_switches.size()); switch_id++){ const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; if (rr_switch.name.find("delayless") != std::string::npos) { delayless_switch = static_cast(switch_id); @@ -762,7 +762,7 @@ void create_rr_graph(const t_graph_type graph_type, grid, block_types, device_ctx.rr_graph, - det_routing_arch->delayless_switch, + delayless_switch, det_routing_arch->R_minW_nmos, det_routing_arch->R_minW_pmos, mutable_device_ctx.rr_graph_builder, @@ -794,7 +794,7 @@ void create_rr_graph(const t_graph_type graph_type, // When this function is called in any stage other than routing, the is_flat flag passed to this function is false, regardless of the flag passed // through command line. So, the graph corresponding to global resources will be created and written down to file if needed. During routing, if flat-routing // is enabled, intra-cluster resources will be added to the graph, but this new bigger graph will not be written down. - if (!det_routing_arch->write_rr_graph_filename.empty() && !is_flat) { + if (!det_routing_arch->write_rr_graph_filename.empty()) { write_rr_graph(&mutable_device_ctx.rr_graph_builder, &mutable_device_ctx.rr_graph, device_ctx.physical_tile_types, From f66b4bbf685010a4a2e5ab701667f84521973f9e Mon Sep 17 00:00:00 2001 From: amin1377 Date: Fri, 2 Aug 2024 10:31:04 -0400 Subject: [PATCH 05/14] don't write down flat rr graph --- vpr/src/route/rr_graph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 37dde2fbee2..81c759f9708 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -794,7 +794,7 @@ void create_rr_graph(const t_graph_type graph_type, // When this function is called in any stage other than routing, the is_flat flag passed to this function is false, regardless of the flag passed // through command line. So, the graph corresponding to global resources will be created and written down to file if needed. During routing, if flat-routing // is enabled, intra-cluster resources will be added to the graph, but this new bigger graph will not be written down. - if (!det_routing_arch->write_rr_graph_filename.empty()) { + if (!det_routing_arch->write_rr_graph_filename.empty() && !is_flat) { write_rr_graph(&mutable_device_ctx.rr_graph_builder, &mutable_device_ctx.rr_graph, device_ctx.physical_tile_types, From e4431f34caee7f3bab3add9c20e5cb82a3ab8284 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 8 Aug 2024 09:37:18 -0400 Subject: [PATCH 06/14] rename is_rampped to switches_remapped --- vpr/src/route/rr_graph.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 81c759f9708..2890fe12802 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -272,7 +272,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, const int delayless_switch, t_physical_tile_type_ptr physical_type_ptr, - bool is_remapped); + bool switches_remapped); static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, std::map& arch_sw_inf_map, @@ -381,7 +381,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, int layer, int i, int j, - bool is_remapped); + bool switches_remapped); /** * Edges going in/out of collapse nodes are not added by the normal routine. This function add those edges @@ -2039,7 +2039,7 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder *Fc_clipped = false; /* This function is called to build the general routing graph resoruces. Thus, the edges are not remapped yet.*/ - bool is_remapped = false; + bool switches_remapped = false; int num_edges = 0; /* Connection SINKS and SOURCES to their pins - Initializing IPINs/OPINs. */ @@ -2074,7 +2074,7 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder rr_edges_to_create, delayless_switch, physical_tile, - is_remapped); + switches_remapped); //Create the actual SOURCE->OPIN, IPIN->SINK edges uniquify_edges(rr_edges_to_create); @@ -2477,7 +2477,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, const int delayless_switch, t_physical_tile_type_ptr physical_type_ptr, - bool is_remapped) { + bool switches_remapped) { for (auto class_num : class_num_vec) { const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num); auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num); @@ -2497,11 +2497,11 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, auto pin_type = get_pin_type_from_pin_physical_num(physical_type_ptr, pin_num); if (class_type == DRIVER) { VTR_ASSERT(pin_type == DRIVER); - rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, is_remapped); + rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, switches_remapped); } else { VTR_ASSERT(class_type == RECEIVER); VTR_ASSERT(pin_type == RECEIVER); - rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, is_remapped); + rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, switches_remapped); } } } @@ -2745,7 +2745,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, int layer, int i, int j, - bool is_remapped) { + bool switches_remapped) { auto pin_num_range = get_pb_pins(physical_type, sub_tile, logical_block, @@ -2799,7 +2799,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, pin_physical_num, conn_pin_physical_num); - if (is_remapped) { + if (switches_remapped) { auto& all_sw_inf = g_vpr_ctx.mutable_device().all_sw_inf; float delay = g_vpr_ctx.device().all_sw_inf.at(sw_idx).Tdel(); bool is_new_sw; @@ -2807,10 +2807,10 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, all_sw_inf, R_minW_nmos, R_minW_pmos, - is_remapped, + switches_remapped, delay); } - rr_edges_to_create.emplace_back(parent_pin_node_id, conn_pin_node_id, sw_idx, is_remapped); + rr_edges_to_create.emplace_back(parent_pin_node_id, conn_pin_node_id, sw_idx, switches_remapped); } } } From 89c5c1888f8fa339723b7c2cd3b02538d33b9695 Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 8 Aug 2024 10:53:59 -0400 Subject: [PATCH 07/14] [vpr][rr_graph] add get_delayless switch --- vpr/src/route/rr_graph.cpp | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 2890fe12802..2f62b4aff69 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -264,6 +264,18 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const int delayless_switch, t_physical_tile_type_ptr physical_type_ptr); +/** + * Add the edges between IPIN to SINK and SOURCE to OPIN to rr_edges_to_create + * @param rr_graph_builder RR Graph Bulder object which contain the RR Graph storage + * @param class_num_vec Class physical numbers to add the edges connected to them + * @param layer The layer number of the block to add the SINK/SRC connections of it. + * @param i The x location of the block to add the SINK/SRC connections of it. + * @param j The y location of the block to add the SINK/SRC connections of it + * @param rr_edges_to_create An object which store all of the edges created in this function. + * @param delayless_switch Switch ID of the delayless switch. + * @param physical_type_ptr A pointer to the physical type of the block for which the edges are created. + * @param switches_remapped A flag to indicate whether edge switch IDs are remapped + */ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const std::vector& class_num_vec, const int layer, @@ -659,6 +671,9 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, bool is_flat, bool load_rr_graph); +static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, + bool load_rr_graph); + /******************* Subroutine definitions *******************************/ void create_rr_graph(const t_graph_type graph_type, @@ -744,19 +759,7 @@ void create_rr_graph(const t_graph_type graph_type, } if (is_flat) { - short delayless_switch = OPEN; - if (load_rr_graph) { - const auto& rr_switches = device_ctx.rr_graph.rr_switch(); - for (int switch_id = 0; switch_id < static_cast(rr_switches.size()); switch_id++){ - const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; - if (rr_switch.name.find("delayless") != std::string::npos) { - delayless_switch = static_cast(switch_id); - break; - } - } - } else { - delayless_switch = det_routing_arch->delayless_switch; - } + short delayless_switch = get_delayless_switch_id(det_routing_arch, load_rr_graph); VTR_ASSERT(delayless_switch != OPEN); build_intra_cluster_rr_graph(graph_type, grid, @@ -1529,6 +1532,26 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, is_flat); } +static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, + bool load_rr_graph) { + const auto& device_ctx = g_vpr_ctx.device(); + short delayless_switch; + if (load_rr_graph) { + const auto& rr_switches = device_ctx.rr_graph.rr_switch(); + for (int switch_id = 0; switch_id < static_cast(rr_switches.size()); switch_id++){ + const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; + if (rr_switch.name.find("delayless") != std::string::npos) { + delayless_switch = static_cast(switch_id); + break; + } + } + } else { + delayless_switch = det_routing_arch->delayless_switch; + } + + return delayless_switch; +} + void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder, const t_det_routing_arch& det_routing_arch, t_physical_tile_type_ptr physical_tile, From fe44c0436e3306c7ff1ea2f8918cf0ceab265eee Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 8 Aug 2024 10:59:41 -0400 Subject: [PATCH 08/14] comment on get_delayless_switch_id --- vpr/src/route/rr_graph.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 2f62b4aff69..8bb38d06e9e 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -671,6 +671,12 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, bool is_flat, bool load_rr_graph); +/** + * Return the ID for delayess switch. If the RR graph is loaded from a file, then the assumption + * is that the returned ID should be a RR switch ID not architecture ID. + * @param det_routing_arch Contain the information from architecture file + * @param load_rr_graph Indicate whether the RR graph is loaded from a file + */ static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, bool load_rr_graph); From 84ef29620ea46842ef7c7a5c3450a772248ae3fa Mon Sep 17 00:00:00 2001 From: amin1377 Date: Thu, 8 Aug 2024 11:09:17 -0400 Subject: [PATCH 09/14] [vpr][rr_graph] fix iterator type --- vpr/src/route/rr_graph.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 8bb38d06e9e..1a86b6ea900 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -1544,7 +1544,7 @@ static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, short delayless_switch; if (load_rr_graph) { const auto& rr_switches = device_ctx.rr_graph.rr_switch(); - for (int switch_id = 0; switch_id < static_cast(rr_switches.size()); switch_id++){ + for (size_t switch_id = 0; switch_id < rr_switches.size(); switch_id++){ const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; if (rr_switch.name.find("delayless") != std::string::npos) { delayless_switch = static_cast(switch_id); @@ -2067,7 +2067,8 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder /* If Fc gets clipped, this will be flagged to true */ *Fc_clipped = false; - /* This function is called to build the general routing graph resoruces. Thus, the edges are not remapped yet.*/ + /* This function is called to build the general routing graph resoruces. Thus, + the edges are not remapped yet.*/ bool switches_remapped = false; int num_edges = 0; @@ -3008,11 +3009,13 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, is_rr_sw_id, delay); - if (is_new_sw) { - if (!load_rr_graph) { - auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; - switch_fanin_remap.push_back({{UNDEFINED, size_t(sw_id)}}); - } + /*If the switch found inside the cluster has not seen before and RR graph is not read from a file, + we need to add this switch to switch_fanin_remap data strcutre which is used later to remap switch IDs + from architecture ID to RR graph switch ID. The reason why we don't this when RR graph is read from a file + is that in that case, the switch IDs of edges are alreay RR graph switch IDs. */ + if (is_new_sw && !load_rr_graph) { + auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; + switch_fanin_remap.push_back({{UNDEFINED, size_t(sw_id)}}); } rr_edges_to_create.emplace_back(src_pair.first, sink_rr_node_id, sw_id, is_rr_sw_id); From 2d630434a51e4267f9f19dda2151e04ba19bfddd Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Sat, 10 Aug 2024 04:37:52 +0000 Subject: [PATCH 10/14] Fixes for gcc-15 build --- libs/librrgraph/src/base/rr_graph_storage.h | 3 ++- libs/librrgraph/src/base/rr_node_types.h | 1 + libs/libvtrutil/src/vtr_string_interning.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libs/librrgraph/src/base/rr_graph_storage.h b/libs/librrgraph/src/base/rr_graph_storage.h index 570576ba20e..db791e4cda3 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.h +++ b/libs/librrgraph/src/base/rr_graph_storage.h @@ -15,8 +15,9 @@ #include "vtr_memory.h" #include "vtr_strong_id_range.h" #include "vtr_array_view.h" -#include +#include #include +#include /* Main structure describing one routing resource node. Everything in * * this structure should describe the graph -- information needed only * diff --git a/libs/librrgraph/src/base/rr_node_types.h b/libs/librrgraph/src/base/rr_node_types.h index 1b38848f21b..3e9f89d4c82 100644 --- a/libs/librrgraph/src/base/rr_node_types.h +++ b/libs/librrgraph/src/base/rr_node_types.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "vtr_range.h" #include "vtr_ndmatrix.h" diff --git a/libs/libvtrutil/src/vtr_string_interning.h b/libs/libvtrutil/src/vtr_string_interning.h index bbf22766170..65b611e6015 100644 --- a/libs/libvtrutil/src/vtr_string_interning.h +++ b/libs/libvtrutil/src/vtr_string_interning.h @@ -48,6 +48,7 @@ #include #include #include +#include #include #include From 4657a7d8590b2befc0fd5634a9b0351431362d2d Mon Sep 17 00:00:00 2001 From: amin1377 Date: Mon, 12 Aug 2024 18:01:19 -0400 Subject: [PATCH 11/14] [vpr][rr_graph] add switch to rr switch if new arch switch is found --- vpr/src/route/rr_graph.cpp | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 1a86b6ea900..5897d2433a9 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -2476,17 +2476,6 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, continue; } auto pin_type = get_pin_type_from_pin_physical_num(physical_type_ptr, pin_num); - /*int sw_id = -1; - * if (is_primitive || pin_type == RECEIVER) { - * VTR_ASSERT(logical_block != nullptr); - * float primitive_comb_delay = get_pin_primitive_comb_delay(physical_type_ptr, - * logical_block, - * pin_num); - * sw_id = find_create_intra_cluster_sw_arch_idx(arch_sw_inf_map, - * primitive_comb_delay); - * } else { - * sw_id = delayless_switch; - * }*/ if (class_type == DRIVER) { VTR_ASSERT(pin_type == DRIVER); rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, false); @@ -3009,15 +2998,6 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, is_rr_sw_id, delay); - /*If the switch found inside the cluster has not seen before and RR graph is not read from a file, - we need to add this switch to switch_fanin_remap data strcutre which is used later to remap switch IDs - from architecture ID to RR graph switch ID. The reason why we don't this when RR graph is read from a file - is that in that case, the switch IDs of edges are alreay RR graph switch IDs. */ - if (is_new_sw && !load_rr_graph) { - auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; - switch_fanin_remap.push_back({{UNDEFINED, size_t(sw_id)}}); - } - rr_edges_to_create.emplace_back(src_pair.first, sink_rr_node_id, sw_id, is_rr_sw_id); } } @@ -4976,6 +4956,18 @@ static std::pair find_create_intra_cluster_sw(RRGraphBuilder& rr_grap // If this assumption proven to not be accurate, the implementation needs to be changed. VTR_ASSERT(arch_sw.fixed_Tdel()); + t_rr_switch_inf new_rr_switch_inf = create_rr_switch_from_arch_switch(create_internal_arch_sw(delay), + R_minW_nmos, + R_minW_pmos); + RRSwitchId rr_switch_id = rr_graph.add_rr_switch(new_rr_switch_inf); + + /*If the switch found inside the cluster has not seen before and RR graph is not read from a file, + we need to add this switch to switch_fanin_remap data strcutre which is used later to remap switch IDs + from architecture ID to RR graph switch ID. The reason why we don't this when RR graph is read from a file + is that in that case, the switch IDs of edges are alreay RR graph switch IDs. */ + auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; + switch_fanin_remap.push_back({{UNDEFINED, size_t(rr_switch_id)}}); + return std::make_pair(true, new_key_num); } else { return std::make_pair(false, find_res->first); From e24d31ad19380c8bab56a624d47d52efe7e1259e Mon Sep 17 00:00:00 2001 From: Mahesh Madhav Date: Tue, 13 Aug 2024 04:04:24 +0000 Subject: [PATCH 12/14] Consistency in iterator declaration --- libs/libvtrutil/src/vtr_ragged_matrix.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/libvtrutil/src/vtr_ragged_matrix.h b/libs/libvtrutil/src/vtr_ragged_matrix.h index 18ba18f9b58..862047e0b1e 100644 --- a/libs/libvtrutil/src/vtr_ragged_matrix.h +++ b/libs/libvtrutil/src/vtr_ragged_matrix.h @@ -238,11 +238,11 @@ class FlatRaggedMatrix { return !(*this == other); } - int operator-(const RowLengthIterator& other) { + difference_type operator-(const RowLengthIterator& other) { return irow_ - other.irow_; } - size_t operator*() { + value_type operator*() { //Call the callback to get the row length return callback_(Index0(irow_)); } From be4f61baf5e3d65aa7527c1bd5a1a8f73bc7804c Mon Sep 17 00:00:00 2001 From: amin1377 Date: Tue, 13 Aug 2024 10:47:16 -0400 Subject: [PATCH 13/14] [vpr][rr_graph] initialized delayless switch --- vpr/src/route/rr_graph.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 5897d2433a9..87c3155c752 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -677,8 +677,8 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, * @param det_routing_arch Contain the information from architecture file * @param load_rr_graph Indicate whether the RR graph is loaded from a file */ -static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, - bool load_rr_graph); +static int get_delayless_switch_id(t_det_routing_arch* det_routing_arch, + bool load_rr_graph); /******************* Subroutine definitions *******************************/ @@ -765,7 +765,7 @@ void create_rr_graph(const t_graph_type graph_type, } if (is_flat) { - short delayless_switch = get_delayless_switch_id(det_routing_arch, load_rr_graph); + int delayless_switch = get_delayless_switch_id(det_routing_arch, load_rr_graph); VTR_ASSERT(delayless_switch != OPEN); build_intra_cluster_rr_graph(graph_type, grid, @@ -1538,21 +1538,21 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, is_flat); } -static short get_delayless_switch_id (t_det_routing_arch* det_routing_arch, - bool load_rr_graph) { +static int get_delayless_switch_id(t_det_routing_arch* det_routing_arch, + bool load_rr_graph) { const auto& device_ctx = g_vpr_ctx.device(); - short delayless_switch; + int delayless_switch = OPEN; if (load_rr_graph) { const auto& rr_switches = device_ctx.rr_graph.rr_switch(); for (size_t switch_id = 0; switch_id < rr_switches.size(); switch_id++){ const auto& rr_switch = rr_switches[RRSwitchId(switch_id)]; if (rr_switch.name.find("delayless") != std::string::npos) { - delayless_switch = static_cast(switch_id); + delayless_switch = static_cast(switch_id); break; } } } else { - delayless_switch = det_routing_arch->delayless_switch; + delayless_switch = static_cast(det_routing_arch->delayless_switch); } return delayless_switch; From c61710311481a92bf833d2a7211010f92b9da810 Mon Sep 17 00:00:00 2001 From: vaughnbetz Date: Thu, 15 Aug 2024 19:22:44 -0400 Subject: [PATCH 14/14] Update vpr_constraints.rst summary to cover placement and global signals --- doc/src/vpr/vpr_constraints.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/src/vpr/vpr_constraints.rst b/doc/src/vpr/vpr_constraints.rst index a56fecf358b..29b00c204b1 100644 --- a/doc/src/vpr/vpr_constraints.rst +++ b/doc/src/vpr/vpr_constraints.rst @@ -2,9 +2,7 @@ VPR Constraints ========================= .. _vpr_constraints: -VPR allows users to run the flow with placement constraints that enable primitives to be locked down to a specific region on the chip and global routing constraints that facilitate the routing of global nets through clock networks. - -Users can specify these constraints through a constraints file in XML format, as shown in the format below. +Users can specify placement and/or global routing constraints on all or part of a design through a constraints file in XML format, as shown in the format below. These constraints are optional and allow detailed control of the region on the chip in which parts of the design are placed, and of the routing of global nets through dedicated (usually clock) networks. .. code-block:: xml :caption: The overall format of a VPR constraints file