From 70574f764de77f4c29a4287911499b94dcacb56c Mon Sep 17 00:00:00 2001 From: rgantonio Date: Tue, 2 Apr 2024 12:34:26 +0200 Subject: [PATCH 1/9] docs: Updated schema --- docs/schema/snitch_cluster.schema.json | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/schema/snitch_cluster.schema.json b/docs/schema/snitch_cluster.schema.json index 752f968ca..beb16c974 100644 --- a/docs/schema/snitch_cluster.schema.json +++ b/docs/schema/snitch_cluster.schema.json @@ -444,6 +444,42 @@ "title": "SNAX TCDM Port", "description": "Number of ports dedicated for an accelerator", "default": 0 + }, + "snax_connect_tcdm_wide_only": { + "type": "boolean", + "title": "Connect SNAX TCDM to Wide TCDM MUX", + "description": "Connects a SNAX accelerator's TCDM ports to the wide TCDM MUX only. If false, it connects to narrow TCDM only.", + "default": false + }, + "snax_connect_narrow_wide_mix": { + "type": "boolean", + "title": "Connect SNAX TCDM to narrow and wide MUX", + "description": "Connects a SNAX accelerator's TCDM ports to both narrow and wide. If true, it uses the indices specified below.", + "default": false + }, + "snax_narrow_tcdm_start_idx": { + "type": "number", + "title": "Narrow TCDM Starting Index", + "description": "The starting index of the narrow TCDM port. Indexing starts from 0.", + "default": 0 + }, + "snax_narrow_tcdm_end_idx": { + "type": "number", + "title": "Narrow TCDM Ending Index", + "description": "The ending index of the narrow TCDM port. Indexing starts from 0.", + "default": 0 + }, + "snax_wide_tcdm_start_idx": { + "type": "number", + "title": "Wide TCDM Starting Index", + "description": "The starting index of the wide TCDM port. Indexing starts from 0.", + "default": 0 + }, + "snax_wide_tcdm_end_idx": { + "type": "number", + "title": "Wide TCDM Ending Index", + "description": "The ending index of the wide TCDM port. Indexing starts from 0.", + "default": 0 } } }, From 48e46a1ff5a0bbc2dcf05a69a3375abfda5a1bbe Mon Sep 17 00:00:00 2001 From: rgantonio Date: Tue, 2 Apr 2024 12:34:46 +0200 Subject: [PATCH 2/9] hw: Add ports in Snitch cluster --- hw/snitch_cluster/src/snitch_cluster.sv | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index f86b8111e..a7fb7f185 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -97,7 +97,13 @@ module snitch_cluster parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] = '{default: fpnew_pkg::fpu_implementation_t'(0)}, /// Total Number of SNAX TCDM ports - parameter int unsigned TotalSnaxTcdmPorts = 0, + parameter int unsigned SnaxNarrowForWidePorts = 0, + parameter int unsigned SnaxNarrowPorts = 0, + parameter int unsigned TotalSnaxTcdmPorts = SnaxNarrowForWidePorts + SnaxNarrowPorts, + parameter int unsigned SnaxNarrowStartIdx [NrCores] = '{default: 0}, + parameter int unsigned SnaxNarrowEndIdx [NrCores] = '{default: 0}, + parameter int unsigned SnaxWideStartIdx [NrCores] = '{default: 0}, + parameter int unsigned SnaxWideEndIdx [NrCores] = '{default: 0}, /// SNAX Acc Narrow Wide Selection parameter bit [NrCores-1:0] ConnectSnaxAccWide = 0, /// Physical Memory Attribute Configuration From 6b6563a8e23e36e0dda52d63d0c93c0db2090893 Mon Sep 17 00:00:00 2001 From: rgantonio Date: Tue, 2 Apr 2024 12:35:01 +0200 Subject: [PATCH 3/9] tpl: Update wrapper --- .../src/snitch_cluster_wrapper.sv.tpl | 230 ++++++++++++------ 1 file changed, 151 insertions(+), 79 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index b28954bd5..1aa83ed1b 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -1,35 +1,164 @@ -// Copyright 2021 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - ${disclaimer} +<% +# Just some working variables +tcdm_offset_start = 0 +tcdm_offset_stop = -1 +total_snax_tcdm_ports = 0 +snax_core_acc = {} + +# Make me elegant later :P +narrow_start_idx_list = [] +narrow_end_idx_list = [] +wide_start_idx_list = [] +wide_end_idx_list = [] + +# Cycle through each core +# and check if an accelerator setting exists + +for i in range(len(cfg['cores'])): + + # Make sure to initialize a dictionary + # that describes accelerators each core + curr_snax_acc_core = 'snax_core_' + str(i) + snax_acc_dict = {} + snax_acc_flag = False + snax_acc_multi_flag = False + snax_acc_wide_only = False + snax_tcdm_config_count = 0 + snax_acc_narrow_wide_mix = False + snax_num_acc = None + snax_num_csr = None + prefix_snax_nonacc_count = 0 + prefix_snax_count = 0 + + # If an accelerator setting exists + # Layout all possible accelerator configurations + # Per snitch cluster core + if ('snax_acc_set' in cfg['cores'][i]): + snax_acc_flag = True + + # This part checks for the TCDM port configuration + if(cfg['cores'][i]['snax_acc_set']['snax_num_acc'] > 1): + snax_acc_multi_flag = True + snax_num_csr = cfg['cores'][i]['snax_acc_set']['snax_num_csr'] + snax_num_acc = cfg['cores'][i]['snax_acc_set']['snax_num_acc'] + snax_tcdm_config_count += 1 + + if (cfg['cores'][i]['snax_acc_set']['snax_connect_tcdm_wide_only']): + snax_acc_wide_only = True + snax_tcdm_config_count += 1 + + if (cfg['cores'][i]['snax_acc_set']['snax_connect_narrow_wide_mix']): + snax_acc_narrow_wide_mix = True + snax_tcdm_config_count += 1 + + # Assertion check just to make sure + # No one makes a mistake in setting the configurations + assert (snax_tcdm_config_count <= 1), "Error! Can only have 1 tcdm configuration. Multi port, wide only, or narrow-wide mix. Default is narrow only." + + # Pre-saving ports + snax_tcdm_ports = cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] + snax_narrow_tcdm_start_idx = cfg['cores'][i]['snax_acc_set']['snax_narrow_tcdm_start_idx'] + snax_narrow_tcdm_end_idx = cfg['cores'][i]['snax_acc_set']['snax_narrow_tcdm_end_idx'] + snax_wide_tcdm_start_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_start_idx'] + snax_wide_tcdm_end_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_end_idx'] + + narrow_start_idx_list.append(snax_narrow_tcdm_start_idx) + narrow_end_idx_list.append(snax_narrow_tcdm_end_idx) + wide_start_idx_list.append(snax_wide_tcdm_start_idx) + wide_end_idx_list.append(snax_wide_tcdm_end_idx) + + # Cycle through each accelerator setting per Snitch core + for j in range(cfg['cores'][i]['snax_acc_set']['snax_num_acc']): + + # Check for narrow and wide TCDM ports + if(snax_acc_narrow_wide_mix): + total_narrow_mix = snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 + total_wide_mix = snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 + total_mix = total_narrow_mix + total_wide_mix + assert (snax_tcdm_ports == total_mix), "Error! Total TCDM ports should match the index mix." + + + # Prepare accelerator tags + curr_snax_acc = '' + curr_snax_acc = "i_snax_core_" + str(i) + "_acc_" + str(prefix_snax_count) + + # Set tcdm offset ports + tcdm_offset_stop += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] + + # Save settings in the dictionary + snax_acc_dict[curr_snax_acc] = { + 'snax_module': cfg['cores'][i]['snax_acc_set']['snax_module'], + 'snax_tcdm_ports': snax_tcdm_ports, + 'snax_tcdm_offset_start': tcdm_offset_start, + 'snax_tcdm_offset_stop': tcdm_offset_stop, + 'snax_acc_wide_only': snax_acc_wide_only, + 'snax_acc_narrow_wide_mix': snax_acc_narrow_wide_mix, + 'snax_narrow_tcdm_start_idx': snax_narrow_tcdm_start_idx, + 'snax_narrow_tcdm_end_idx': snax_narrow_tcdm_end_idx, + 'snax_wide_tcdm_start_idx': snax_wide_tcdm_start_idx, + 'snax_wide_tcdm_end_idx': snax_wide_tcdm_end_idx, + } + + # This one pre-computes for the multi-accelerator connection + tcdm_offset_start += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] + prefix_snax_count += 1 + total_snax_tcdm_ports += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] + + else: + + # Consider cases without accelerators + # Just leave them as none + curr_snax_acc = "i_snax_core_" + str(i) + "_noacc_" + str(prefix_snax_nonacc_count) + snax_acc_dict[curr_snax_acc] = None + + narrow_start_idx_list.append(0) + narrow_end_idx_list.append(0) + wide_start_idx_list.append(0) + wide_end_idx_list.append(0) + + # This is the packed configuration + snax_core_acc[curr_snax_acc_core] = { + 'snax_acc_flag': snax_acc_flag, + 'snax_acc_multi_flag':snax_acc_multi_flag, + 'snax_num_csr': snax_num_csr, + 'snax_num_acc': snax_num_acc, + 'snax_acc_dict':snax_acc_dict + } +### for i in snax_core_acc: +### print(i) +### for j in snax_core_acc[i]['snax_acc_dict']: +### print(snax_core_acc[i]['snax_acc_dict'][j]) +### ${snax_core_acc['snax_core_0']['snax_acc_dict']['i_snax_core_0_acc_0']['snax_module']} +%>\ <%def name="icache_cfg(prop)"> % for lw in cfg['hives']: ${lw['icache'][prop]}${',' if not loop.last else ''} % endfor - - +\ <%def name="core_cfg(prop)">\ % for c in cfg['cores']: ${c[prop]}${', ' if not loop.last else ''}\ % endfor \ - +<%def name="acc_cfg(prop)">\ + % for a in prop: +${a}${', ' if not loop.last else ''}\ + % endfor +\ <%def name="core_cfg_flat(prop)">\ ${cfg['nr_cores']}'b\ % for c in cfg['cores'][::-1]: ${int(c[prop])}\ % endfor \ - <%def name="core_isa(isa)">\ ${cfg['nr_cores']}'b\ % for c in cfg['cores'][::-1]: ${int(getattr(c['isa_parsed'], isa))}\ % endfor \ - <%def name="ssr_cfg(core, ssr_fmt_str, none_str, inner_sep)">\ % for core in cfg['cores']: % for s in list(reversed(core['ssrs'] + [None]*(cfg['num_ssrs_max']-len(core['ssrs'])))): @@ -40,6 +169,9 @@ ${(" '{" if loop.first else ' ') + \ ${',' if not loop.last else ''} % endfor \ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 // These includes are necessary for pre-defined typedefs `include "axi/typedef.svh" @@ -297,6 +429,10 @@ module ${cfg['name']}_wrapper ( localparam int unsigned NumSequencerInstr [${cfg['nr_cores']}] = '{${core_cfg('num_sequencer_instructions')}}; localparam int unsigned NumSsrs [${cfg['nr_cores']}] = '{${core_cfg('num_ssrs')}}; localparam int unsigned SsrMuxRespDepth [${cfg['nr_cores']}] = '{${core_cfg('ssr_mux_resp_depth')}}; + localparam int unsigned SnaxNarrowStartIdx [${cfg['nr_cores']}] = '{${acc_cfg(narrow_start_idx_list)}}; + localparam int unsigned SnaxNarrowEndIdx [${cfg['nr_cores']}] = '{${acc_cfg(narrow_end_idx_list)}}; + localparam int unsigned SnaxWideStartIdx [${cfg['nr_cores']}] = '{${acc_cfg(wide_start_idx_list)}}; + localparam int unsigned SnaxWideEndIdx [${cfg['nr_cores']}] = '{${acc_cfg(wide_end_idx_list)}}; // SNAX accelerator ports per core ${cfg['pkg_name']}::acc_req_t [${cfg['pkg_name']}::NrCores-1:0] snax_req; @@ -306,76 +442,7 @@ module ${cfg['name']}_wrapper ( logic [${cfg['pkg_name']}::NrCores-1:0] snax_pvalid; logic [${cfg['pkg_name']}::NrCores-1:0] snax_pready; logic [${cfg['pkg_name']}::NrCores-1:0] snax_barrier; -<% -# Just some working variables -tcdm_offset_start = 0 -tcdm_offset_stop = -1 -total_snax_tcdm_ports = 0 -snax_core_acc = {} - -# Cycle through each core -# and check if an accelerator setting exists - -for i in range(len(cfg['cores'])): - - # Make sure to initialize a dictionary - # that describes accelerators each core - curr_snax_acc_core = 'snax_core_' + str(i) - snax_acc_dict = {} - snax_acc_flag = False - snax_acc_multi_flag = False - snax_num_acc = None - snax_num_csr = None - prefix_snax_nonacc_count = 0 - prefix_snax_count = 0 - - # If an accelerator setting exists - # Layout all possible accelerator configurations - # Per snitch cluster core - if ('snax_acc_set' in cfg['cores'][i]): - snax_acc_flag = True - - if(cfg['cores'][i]['snax_acc_set']['snax_num_acc'] > 1): - snax_acc_multi_flag = True - snax_num_csr = cfg['cores'][i]['snax_acc_set']['snax_num_csr'] - snax_num_acc = cfg['cores'][i]['snax_acc_set']['snax_num_acc'] - - for j in range(cfg['cores'][i]['snax_acc_set']['snax_num_acc']): - - # Prepare accelerator tags - curr_snax_acc = '' - curr_snax_acc = "i_snax_core_" + str(i) + "_acc_" + str(prefix_snax_count) - # Set tcdm offset ports - tcdm_offset_stop += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] - - # Save settings in the dictionary - snax_acc_dict[curr_snax_acc] = { - 'snax_module': cfg['cores'][i]['snax_acc_set']['snax_module'], - 'snax_tcdm_ports': cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'], - 'snax_tcdm_offset_start': tcdm_offset_start, - 'snax_tcdm_offset_stop': tcdm_offset_stop - } - tcdm_offset_start += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] - prefix_snax_count += 1 - total_snax_tcdm_ports += cfg['cores'][i]['snax_acc_set']['snax_tcdm_ports'] - - else: - - # Consider cases without accelerators - # Just leave them as none - curr_snax_acc = "i_snax_core_" + str(i) + "_noacc_" + str(prefix_snax_nonacc_count) - snax_acc_dict[curr_snax_acc] = None - - # This is the packed configuration - snax_core_acc[curr_snax_acc_core] = { - 'snax_acc_flag': snax_acc_flag, - 'snax_acc_multi_flag':snax_acc_multi_flag, - 'snax_num_csr': snax_num_csr, - 'snax_num_acc': snax_num_acc, - 'snax_acc_dict':snax_acc_dict - } -%> // SNAX TCDM wires // Wires need to be declared before use ${cfg['pkg_name']}::tcdm_req_t [${total_snax_tcdm_ports-1}:0] snax_tcdm_req; @@ -425,6 +492,10 @@ for i in range(len(cfg['cores'])): .Xssr (${core_cfg_flat('xssr')}), .Xfrep (${core_cfg_flat('xfrep')}), .TotalSnaxTcdmPorts(${total_snax_tcdm_ports}), + .SnaxNarrowStartIdx ( SnaxNarrowStartIdx ), + .SnaxNarrowEndIdx ( SnaxNarrowEndIdx ), + .SnaxWideStartIdx ( SnaxWideStartIdx ), + .SnaxWideEndIdx ( SnaxWideEndIdx ), .ConnectSnaxAccWide(${core_cfg_flat('snax_acc_wide')}), .FPUImplementation (${cfg['pkg_name']}::FPUImplementation), .SnitchPMACfg (${cfg['pkg_name']}::SnitchPMACfg), @@ -593,8 +664,9 @@ for i in range(len(cfg['cores'])): ); %endfor % else: - % for jdx, jdx_key in enumerate(snax_core_acc[idx_key]['snax_acc_dict']): + // One core controlling one accelerator + % for jdx, jdx_key in enumerate(snax_core_acc[idx_key]['snax_acc_dict']): ${snax_core_acc[idx_key]['snax_acc_dict'][jdx_key]['snax_module']} # ( .DataWidth ( ${cfg['pkg_name']}::NarrowDataWidth ), .SnaxTcdmPorts ( ${snax_core_acc[idx_key]['snax_acc_dict'][jdx_key]['snax_tcdm_ports']} ), From 388e111c0e6f2b3de07cf95b532be22b05703715 Mon Sep 17 00:00:00 2001 From: rgantonio Date: Tue, 2 Apr 2024 12:38:30 +0200 Subject: [PATCH 4/9] cfg: Test configuration --- target/snitch_cluster/cfg/snax-test.hjson | 131 ++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 target/snitch_cluster/cfg/snax-test.hjson diff --git a/target/snitch_cluster/cfg/snax-test.hjson b/target/snitch_cluster/cfg/snax-test.hjson new file mode 100644 index 000000000..bab58f3c8 --- /dev/null +++ b/target/snitch_cluster/cfg/snax-test.hjson @@ -0,0 +1,131 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// Cluster configuration for a simple testbench system. +{ + nr_s1_quadrant: 1, + s1_quadrant: { + nr_clusters: 1, + }, + + cluster: { + boot_addr: 4096, // 0x1000 + cluster_base_addr: 268435456, // 0x1000_0000 + cluster_base_offset: 0, // 0x0 + cluster_base_hartid: 0, + addr_width: 48, + data_width: 64, + tcdm: { + size: 128, + banks: 32, + }, + cluster_periph_size: 64, // kB + zero_mem_size: 64, // kB + dma_data_width: 512, + dma_axi_req_fifo_depth: 3, + dma_req_fifo_depth: 3, + // Timing parameters + timing: { + lat_comp_fp32: 3, + lat_comp_fp64: 3, + lat_comp_fp16: 2, + lat_comp_fp16_alt: 2, + lat_comp_fp8: 1, + lat_comp_fp8_alt: 1, + lat_noncomp: 1, + lat_conv: 1, + lat_sdotp: 2, + fpu_pipe_config: "BEFORE" + narrow_xbar_latency: "CUT_ALL_PORTS", + wide_xbar_latency: "CUT_ALL_PORTS", + // Isolate the core. + register_core_req: true, + register_core_rsp: true, + register_offload_req: true, + register_offload_rsp: true + }, + hives: [ + // Hive 0 + { + icache: { + size: 8, // total instruction cache size in kByte + sets: 2, // number of ways + cacheline: 256 // word size in bits + }, + cores: [ + { $ref: "#/snax_mac_core_template" }, + { $ref: "#/dma_core_template" }, + ] + } + ] + }, + dram: { + // 0x8000_0000 + address: 2147483648, + // 0x8000_0000 + length: 2147483648 + }, + peripherals: { + clint: { + // 0xffff_0000 + address: 4294901760, + // 0x0000_1000 + length: 4096 + }, + }, + // Templates. + snax_mac_core_template: { + isa: "rv32imafd", + xssr: true, + xfrep: true, + xdma: false, + xf16: true, + xf16alt: true, + xf8: true, + xf8alt: true, + xfdotp: true, + xfvec: true, + snax_acc_set: { + snax_module: "snax_gemm_wrapper", + snax_tcdm_ports: 24, + snax_num_acc: 1, + snax_connect_tcdm_wide_only: false, + snax_connect_narrow_wide_mix: true, + snax_narrow_tcdm_start_idx: 0, + snax_narrow_tcdm_end_idx: 7, + snax_wide_tcdm_start_idx: 8, + snax_wide_tcdm_end_idx: 23, + }, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + // Enable division/square root unit + // Xdiv_sqrt: true, + }, + dma_core_template: { + isa: "rv32imafd", + // Xdiv_sqrt: true, + # isa: "rv32ema", + xdma: true + xssr: false + xfrep: false + xf16: false, + xf16alt: false, + xf8: false, + xf8alt: false, + xfdotp: false, + xfvec: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + } +} From 9dc2e9d4917a8f8811c5712b45d73665e335223a Mon Sep 17 00:00:00 2001 From: xiaoling-yi Date: Tue, 2 Apr 2024 15:08:25 +0200 Subject: [PATCH 5/9] add narrow and wide switch --- hw/snitch_cluster/src/snitch_cluster.sv | 263 +++++++++++++----- .../src/snitch_cluster_wrapper.sv.tpl | 49 +++- target/snitch_cluster/cfg/snax-test.hjson | 72 ++++- 3 files changed, 292 insertions(+), 92 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index a7fb7f185..e273c8049 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -97,9 +97,11 @@ module snitch_cluster parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] = '{default: fpnew_pkg::fpu_implementation_t'(0)}, /// Total Number of SNAX TCDM ports - parameter int unsigned SnaxNarrowForWidePorts = 0, + parameter bit SnaxWideOnly = 0, + parameter bit SnaxNarrowAndWide = 0, + parameter int unsigned SnaxWidePorts = 0, parameter int unsigned SnaxNarrowPorts = 0, - parameter int unsigned TotalSnaxTcdmPorts = SnaxNarrowForWidePorts + SnaxNarrowPorts, + parameter int unsigned TotalSnaxTcdmPorts = SnaxWidePorts + SnaxNarrowPorts, parameter int unsigned SnaxNarrowStartIdx [NrCores] = '{default: 0}, parameter int unsigned SnaxNarrowEndIdx [NrCores] = '{default: 0}, parameter int unsigned SnaxWideStartIdx [NrCores] = '{default: 0}, @@ -657,87 +659,196 @@ module snitch_cluster // Use this ports for the total number and needs to be cute into multiple versions // It needs to be divided by 8 because each narrow TCDM port is 64 bits wide - localparam int unsigned NumSnaxWideTcdmPorts = TotalSnaxTcdmPorts/8; - if (ConnectSnaxAccWide != 0) begin: gen_yes_wide_acc_connect + localparam int unsigned NumSnaxWideTcdmPorts = SnaxWidePorts/8; - // First declare the wide SNAX tcdm ports - tcdm_dma_req_t [NumSnaxWideTcdmPorts-1:0] snax_wide_req; - tcdm_dma_rsp_t [NumSnaxWideTcdmPorts-1:0] snax_wide_rsp; + tcdm_dma_req_t [NumSnaxWideTcdmPorts-1:0] snax_wide_req; + tcdm_dma_rsp_t [NumSnaxWideTcdmPorts-1:0] snax_wide_rsp; + + tcdm_req_t [SnaxNarrowPorts-1:0] snax_narrow_req; + tcdm_rsp_t [SnaxNarrowPorts-1:0] snax_narrow_rsp; + + if(SnaxWideOnly)begin + assign snax_narrow_req = '0; // This is for hard remapping of signals // !!! Note that System verilog does not support // Part-select method for unpacked signals always_comb begin for (int i = 0; i < NumSnaxWideTcdmPorts; i++) begin - // Request ports - snax_wide_req[i].q.addr = snax_tcdm_req_i[i*8].q.addr ; - snax_wide_req[i].q.write = snax_tcdm_req_i[i*8].q.write; - snax_wide_req[i].q.amo = reqrsp_pkg::AMONone; - snax_wide_req[i].q.data = { - snax_tcdm_req_i[i*8+7].q.data, - snax_tcdm_req_i[i*8+6].q.data, - snax_tcdm_req_i[i*8+5].q.data, - snax_tcdm_req_i[i*8+4].q.data, - snax_tcdm_req_i[i*8+3].q.data, - snax_tcdm_req_i[i*8+2].q.data, - snax_tcdm_req_i[i*8+1].q.data, - snax_tcdm_req_i[i*8].q.data - }; - snax_wide_req[i].q.strb = { - snax_tcdm_req_i[i*8+7].q.strb, - snax_tcdm_req_i[i*8+6].q.strb, - snax_tcdm_req_i[i*8+5].q.strb, - snax_tcdm_req_i[i*8+4].q.strb, - snax_tcdm_req_i[i*8+3].q.strb, - snax_tcdm_req_i[i*8+2].q.strb, - snax_tcdm_req_i[i*8+1].q.strb, - snax_tcdm_req_i[i*8].q.strb - }; - snax_wide_req[i].q.user = '0; - snax_wide_req[i].q_valid = &{ - snax_tcdm_req_i[i*8+7].q_valid, - snax_tcdm_req_i[i*8+6].q_valid, - snax_tcdm_req_i[i*8+5].q_valid, - snax_tcdm_req_i[i*8+4].q_valid, - snax_tcdm_req_i[i*8+3].q_valid, - snax_tcdm_req_i[i*8+2].q_valid, - snax_tcdm_req_i[i*8+1].q_valid, - snax_tcdm_req_i[i*8].q_valid - }; - - // Response ports - { - snax_tcdm_rsp_o[i*8+7].p.data, - snax_tcdm_rsp_o[i*8+6].p.data, - snax_tcdm_rsp_o[i*8+5].p.data, - snax_tcdm_rsp_o[i*8+4].p.data, - snax_tcdm_rsp_o[i*8+3].p.data, - snax_tcdm_rsp_o[i*8+2].p.data, - snax_tcdm_rsp_o[i*8+1].p.data, - snax_tcdm_rsp_o[i*8].p.data - } = snax_wide_rsp[i].p.data; - - snax_tcdm_rsp_o[i*8+7].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+6].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+5].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+4].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+3].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+2].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8+1].p_valid = snax_wide_rsp[i].p_valid; - snax_tcdm_rsp_o[i*8].p_valid = snax_wide_rsp[i].p_valid; - - snax_tcdm_rsp_o[i*8+7].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+6].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+5].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+4].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+3].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+2].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8+1].q_ready = snax_wide_rsp[i].q_ready; - snax_tcdm_rsp_o[i*8].q_ready = snax_wide_rsp[i].q_ready; - end + // Request ports + snax_wide_req[i].q.addr = snax_tcdm_req_i[i*8].q.addr ; + snax_wide_req[i].q.write = snax_tcdm_req_i[i*8].q.write; + snax_wide_req[i].q.amo = reqrsp_pkg::AMONone; + snax_wide_req[i].q.data = { + snax_tcdm_req_i[i*8+7].q.data, + snax_tcdm_req_i[i*8+6].q.data, + snax_tcdm_req_i[i*8+5].q.data, + snax_tcdm_req_i[i*8+4].q.data, + snax_tcdm_req_i[i*8+3].q.data, + snax_tcdm_req_i[i*8+2].q.data, + snax_tcdm_req_i[i*8+1].q.data, + snax_tcdm_req_i[i*8].q.data + }; + snax_wide_req[i].q.strb = { + snax_tcdm_req_i[i*8+7].q.strb, + snax_tcdm_req_i[i*8+6].q.strb, + snax_tcdm_req_i[i*8+5].q.strb, + snax_tcdm_req_i[i*8+4].q.strb, + snax_tcdm_req_i[i*8+3].q.strb, + snax_tcdm_req_i[i*8+2].q.strb, + snax_tcdm_req_i[i*8+1].q.strb, + snax_tcdm_req_i[i*8].q.strb + }; + snax_wide_req[i].q.user = '0; + snax_wide_req[i].q_valid = &{ + snax_tcdm_req_i[i*8+7].q_valid, + snax_tcdm_req_i[i*8+6].q_valid, + snax_tcdm_req_i[i*8+5].q_valid, + snax_tcdm_req_i[i*8+4].q_valid, + snax_tcdm_req_i[i*8+3].q_valid, + snax_tcdm_req_i[i*8+2].q_valid, + snax_tcdm_req_i[i*8+1].q_valid, + snax_tcdm_req_i[i*8].q_valid + }; + + // Response ports + { + snax_tcdm_rsp_o[i*8+7].p.data, + snax_tcdm_rsp_o[i*8+6].p.data, + snax_tcdm_rsp_o[i*8+5].p.data, + snax_tcdm_rsp_o[i*8+4].p.data, + snax_tcdm_rsp_o[i*8+3].p.data, + snax_tcdm_rsp_o[i*8+2].p.data, + snax_tcdm_rsp_o[i*8+1].p.data, + snax_tcdm_rsp_o[i*8].p.data + } = snax_wide_rsp[i].p.data; + + snax_tcdm_rsp_o[i*8+7].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+6].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+5].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+4].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+3].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+2].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8+1].p_valid = snax_wide_rsp[i].p_valid; + snax_tcdm_rsp_o[i*8].p_valid = snax_wide_rsp[i].p_valid; + + snax_tcdm_rsp_o[i*8+7].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+6].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+5].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+4].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+3].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+2].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8+1].q_ready = snax_wide_rsp[i].q_ready; + snax_tcdm_rsp_o[i*8].q_ready = snax_wide_rsp[i].q_ready; + end end + end else if (SnaxNarrowAndWide) begin + always_comb begin + + for(int i = 0; i < NrCores; i++) begin + int wide_port_idx = 0; + int narrow_port_idx = 0; + if(SnaxWideEndIdx[i] - SnaxWideStartIdx[i] > 0) begin + int NumSnaxWideTcdmPortCurrentCore = (SnaxWideEndIdx[i] - SnaxWideStartIdx[i])/8; + for(int j = 0; j < NumSnaxWideTcdmPortCurrentCore; j++) begin + // Request ports + snax_wide_req[j + wide_port_idx].q.addr = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.addr ; + snax_wide_req[j + wide_port_idx].q.write = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.write; + snax_wide_req[j + wide_port_idx].q.amo = reqrsp_pkg::AMONone; + snax_wide_req[j + wide_port_idx].q.data = { + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.data + }; + snax_wide_req[j + wide_port_idx].q.strb = { + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.strb + }; + snax_wide_req[i + wide_port_idx].q.user = '0; + snax_wide_req[i + wide_port_idx].q_valid = &{ + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q_valid + }; + + // Response ports + { + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p.data + } = snax_wide_rsp[j + wide_port_idx].p.data; + + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; + + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; + wide_port_idx += NumSnaxWideTcdmPortCurrentCore; + end + end + else if (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] > 0) begin + int NumSnaxNarrowTcdmPortCurrentCore = (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i]); + for (int j = 0; j < NumSnaxNarrowTcdmPortCurrentCore; j++) begin + // Request ports + snax_narrow_req[j + narrow_port_idx].q.addr = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.addr ; + snax_narrow_req[j + narrow_port_idx].q.write = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.write; + snax_narrow_req[j + narrow_port_idx].q.amo = reqrsp_pkg::AMONone; + snax_narrow_req[j + narrow_port_idx].q.data = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.data; + snax_narrow_req[j + narrow_port_idx].q.strb = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.strb; + snax_narrow_req[j + narrow_port_idx].q.user = '0; + snax_narrow_req[j + narrow_port_idx].q_valid = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q_valid; + + // Response ports + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p.data = snax_narrow_rsp[j + narrow_port_idx].p.data; + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p_valid = snax_narrow_rsp[j + narrow_port_idx].p_valid; + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].q_ready = snax_narrow_rsp[j + narrow_port_idx].q_ready; + narrow_port_idx += NumSnaxNarrowTcdmPortCurrentCore; + end + end + end + end + end else begin + assign snax_narrow_req = snax_tcdm_req_i; + assign snax_tcdm_rsp_o = snax_narrow_rsp; + + assign snax_wide_req = '0; + end + if (NumSnaxWideTcdmPorts > 0) begin: gen_yes_wide_acc_connect snitch_tcdm_interconnect #( .NumInp (1 + NumSnaxWideTcdmPorts), .NumOut (NrSuperBanks), @@ -879,7 +990,7 @@ module snitch_cluster // generate TCDM for snax if any of the cores has SNAX enabled // Make ConnectSnaxAccWide a switcher for now that all accelerators connect to wide // if this happens - if( (TotalSnaxTcdmPorts > 0) && !(|ConnectSnaxAccWide)) begin: gen_yes_snax_tcdm_interconnect + if( SnaxNarrowPorts > 0) begin: gen_yes_snax_tcdm_interconnect snitch_tcdm_interconnect #( .NumInp (NumTCDMIn + TotalSnaxTcdmPorts), @@ -897,8 +1008,8 @@ module snitch_cluster ) i_tcdm_interconnect ( .clk_i, .rst_ni, - .req_i ({axi_soc_req, tcdm_req, snax_tcdm_req_i}), - .rsp_o ({axi_soc_rsp, tcdm_rsp, snax_tcdm_rsp_o}), + .req_i ({axi_soc_req, tcdm_req, snax_narrow_req}), + .rsp_o ({axi_soc_rsp, tcdm_rsp, snax_narrow_rsp}), .mem_req_o (ic_req), .mem_rsp_i (ic_rsp) ); diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 1aa83ed1b..959c48392 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -11,6 +11,12 @@ narrow_start_idx_list = [] narrow_end_idx_list = [] wide_start_idx_list = [] wide_end_idx_list = [] +idx_offset = 0 +SnaxWideOnly = 0 +SnaxWideOnlyNum = 0 +SnaxNarrowAndWide = 0 +SnaxWidePorts = 0 +SnaxNarrowPorts = 0 # Cycle through each core # and check if an accelerator setting exists @@ -47,11 +53,13 @@ for i in range(len(cfg['cores'])): if (cfg['cores'][i]['snax_acc_set']['snax_connect_tcdm_wide_only']): snax_acc_wide_only = True snax_tcdm_config_count += 1 + SnaxWideOnlyNum = SnaxWideOnlyNum + 1 if (cfg['cores'][i]['snax_acc_set']['snax_connect_narrow_wide_mix']): snax_acc_narrow_wide_mix = True snax_tcdm_config_count += 1 - + SnaxNarrowAndWide = SnaxNarrowAndWide + 1 + # Assertion check just to make sure # No one makes a mistake in setting the configurations assert (snax_tcdm_config_count <= 1), "Error! Can only have 1 tcdm configuration. Multi port, wide only, or narrow-wide mix. Default is narrow only." @@ -63,11 +71,30 @@ for i in range(len(cfg['cores'])): snax_wide_tcdm_start_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_start_idx'] snax_wide_tcdm_end_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_end_idx'] - narrow_start_idx_list.append(snax_narrow_tcdm_start_idx) - narrow_end_idx_list.append(snax_narrow_tcdm_end_idx) - wide_start_idx_list.append(snax_wide_tcdm_start_idx) - wide_end_idx_list.append(snax_wide_tcdm_end_idx) - + if (len(narrow_start_idx_list) == 0): + narrow_start_idx_list.append(snax_narrow_tcdm_start_idx) + narrow_end_idx_list.append(snax_narrow_tcdm_end_idx) + wide_start_idx_list.append(snax_wide_tcdm_start_idx) + wide_end_idx_list.append(snax_wide_tcdm_end_idx) + else: + narrow_start_idx_list.append(snax_narrow_tcdm_start_idx + idx_offset) + narrow_end_idx_list.append(snax_narrow_tcdm_end_idx + idx_offset) + wide_start_idx_list.append(snax_wide_tcdm_start_idx + idx_offset) + wide_end_idx_list.append(snax_wide_tcdm_end_idx + idx_offset) + + SnaxWidePorts = SnaxWidePorts + snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 + SnaxNarrowPorts = SnaxNarrowPorts + snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 + + idx_offset += snax_tcdm_ports + + if (SnaxWideOnlyNum == len(cfg['cores']) - 1): + SnaxWideOnly = 1 + else: + if (SnaxNarrowAndWide != 0): + SnaxNarrowAndWide = 1 + else: + error("Error! TCDM configuration is not correct. Please check the configuration.") + # Cycle through each accelerator setting per Snitch core for j in range(cfg['cores'][i]['snax_acc_set']['snax_num_acc']): @@ -92,8 +119,10 @@ for i in range(len(cfg['cores'])): 'snax_tcdm_ports': snax_tcdm_ports, 'snax_tcdm_offset_start': tcdm_offset_start, 'snax_tcdm_offset_stop': tcdm_offset_stop, - 'snax_acc_wide_only': snax_acc_wide_only, - 'snax_acc_narrow_wide_mix': snax_acc_narrow_wide_mix, + 'snax_acc_wide_only': SnaxWideOnly, + 'snax_acc_narrow_wide_mix': SnaxNarrowAndWide, + 'SnaxWidePorts': SnaxWidePorts, + 'SnaxNarrowPorts': SnaxNarrowPorts, 'snax_narrow_tcdm_start_idx': snax_narrow_tcdm_start_idx, 'snax_narrow_tcdm_end_idx': snax_narrow_tcdm_end_idx, 'snax_wide_tcdm_start_idx': snax_wide_tcdm_start_idx, @@ -491,6 +520,10 @@ module ${cfg['name']}_wrapper ( .Xdma (${core_cfg_flat('xdma')}), .Xssr (${core_cfg_flat('xssr')}), .Xfrep (${core_cfg_flat('xfrep')}), + .SnaxWideOnly(${SnaxWideOnly}), + .SnaxNarrowAndWide(${SnaxNarrowAndWide}), + .SnaxWidePorts(${SnaxWidePorts}), + .SnaxNarrowPorts(${SnaxNarrowPorts}), .TotalSnaxTcdmPorts(${total_snax_tcdm_ports}), .SnaxNarrowStartIdx ( SnaxNarrowStartIdx ), .SnaxNarrowEndIdx ( SnaxNarrowEndIdx ), diff --git a/target/snitch_cluster/cfg/snax-test.hjson b/target/snitch_cluster/cfg/snax-test.hjson index bab58f3c8..34a84e1d9 100644 --- a/target/snitch_cluster/cfg/snax-test.hjson +++ b/target/snitch_cluster/cfg/snax-test.hjson @@ -54,7 +54,8 @@ cacheline: 256 // word size in bits }, cores: [ - { $ref: "#/snax_mac_core_template" }, + { $ref: "#/snax_streamer_gemm_core_template" }, + { $ref: "#/snax_streamer_simd_core_template" }, { $ref: "#/dma_core_template" }, ] } @@ -75,7 +76,7 @@ }, }, // Templates. - snax_mac_core_template: { + snax_streamer_gemm_core_template: { isa: "rv32imafd", xssr: true, xfrep: true, @@ -87,16 +88,71 @@ xfdotp: true, xfvec: true, snax_acc_set: { - snax_module: "snax_gemm_wrapper", - snax_tcdm_ports: 24, + snax_module: "snax_streamer_gemm_wrapper", + snax_tcdm_ports: 48, snax_num_acc: 1, snax_connect_tcdm_wide_only: false, snax_connect_narrow_wide_mix: true, snax_narrow_tcdm_start_idx: 0, - snax_narrow_tcdm_end_idx: 7, - snax_wide_tcdm_start_idx: 8, - snax_wide_tcdm_end_idx: 23, - }, + snax_narrow_tcdm_end_idx: 15, + snax_wide_tcdm_start_idx: 16, + snax_wide_tcdm_end_idx: 47, + } + snax_acc_wide: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + // Enable division/square root unit + // Xdiv_sqrt: true, + }, + snax_streamer_simd_core_template: { + isa: "rv32imafd", + xssr: true, + xfrep: true, + xdma: false, + xf16: true, + xf16alt: true, + xf8: true, + xf8alt: true, + xfdotp: true, + xfvec: true, + snax_acc_set: { + snax_module: "snax_streamer_simd_wrapper", + snax_tcdm_ports: 40, + snax_num_acc: 1, + snax_connect_tcdm_wide_only: false, + snax_connect_narrow_wide_mix: true, + snax_narrow_tcdm_start_idx: 32, + snax_narrow_tcdm_end_idx: 39, + snax_wide_tcdm_start_idx: 0, + snax_wide_tcdm_end_idx: 31, + } + snax_acc_wide: false, + num_int_outstanding_loads: 1, + num_int_outstanding_mem: 4, + num_fp_outstanding_loads: 4, + num_fp_outstanding_mem: 4, + num_sequencer_instructions: 16, + num_dtlb_entries: 1, + num_itlb_entries: 1, + // Enable division/square root unit + // Xdiv_sqrt: true, + }, + compute_core_template: { + isa: "rv32imafd", + xssr: true, + xfrep: true, + xdma: false, + xf16: true, + xf16alt: true, + xf8: true, + xf8alt: true, + xfdotp: true, + xfvec: true, num_int_outstanding_loads: 1, num_int_outstanding_mem: 4, num_fp_outstanding_loads: 4, From b7cf3e7721708d472880a2d04121773634ae55f4 Mon Sep 17 00:00:00 2001 From: xiaoling-yi Date: Tue, 2 Apr 2024 16:59:49 +0200 Subject: [PATCH 6/9] add n and w --- hw/snitch_cluster/src/snitch_cluster.sv | 13 ++++-- .../src/snitch_cluster_wrapper.sv.tpl | 46 ++++++++----------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index e273c8049..d7a4e0069 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -96,6 +96,10 @@ module snitch_cluster /// FPU configuration. parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] = '{default: fpnew_pkg::fpu_implementation_t'(0)}, + /// SNAX Acc initial narrow TCDM ports + parameter int unsigned SnaxAccNarrowTcdmPorts = 0, + /// SNAX Acc initial wide TCDM ports + parameter int unsigned SnaxAccWideTcdmPorts = 0, /// Total Number of SNAX TCDM ports parameter bit SnaxWideOnly = 0, parameter bit SnaxNarrowAndWide = 0, @@ -750,7 +754,7 @@ module snitch_cluster int wide_port_idx = 0; int narrow_port_idx = 0; if(SnaxWideEndIdx[i] - SnaxWideStartIdx[i] > 0) begin - int NumSnaxWideTcdmPortCurrentCore = (SnaxWideEndIdx[i] - SnaxWideStartIdx[i])/8; + int NumSnaxWideTcdmPortCurrentCore = (SnaxWideEndIdx[i] - SnaxWideStartIdx[i] + 1)/8; for(int j = 0; j < NumSnaxWideTcdmPortCurrentCore; j++) begin // Request ports snax_wide_req[j + wide_port_idx].q.addr = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.addr ; @@ -820,8 +824,8 @@ module snitch_cluster wide_port_idx += NumSnaxWideTcdmPortCurrentCore; end end - else if (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] > 0) begin - int NumSnaxNarrowTcdmPortCurrentCore = (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i]); + if (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] > 0) begin + int NumSnaxNarrowTcdmPortCurrentCore = (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] + 1); for (int j = 0; j < NumSnaxNarrowTcdmPortCurrentCore; j++) begin // Request ports snax_narrow_req[j + narrow_port_idx].q.addr = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.addr ; @@ -993,7 +997,7 @@ module snitch_cluster if( SnaxNarrowPorts > 0) begin: gen_yes_snax_tcdm_interconnect snitch_tcdm_interconnect #( - .NumInp (NumTCDMIn + TotalSnaxTcdmPorts), + .NumInp (NumTCDMIn + SnaxAccNarrowTcdmPorts), .NumOut (NrBanks), .tcdm_req_t (tcdm_req_t), .tcdm_rsp_t (tcdm_rsp_t), @@ -1013,6 +1017,7 @@ module snitch_cluster .mem_req_o (ic_req), .mem_rsp_i (ic_rsp) ); + end else begin: gen_no_snax_tcdm_interconnect snitch_tcdm_interconnect #( diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 959c48392..4a7be3d3a 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -11,10 +11,12 @@ narrow_start_idx_list = [] narrow_end_idx_list = [] wide_start_idx_list = [] wide_end_idx_list = [] + idx_offset = 0 SnaxWideOnly = 0 SnaxWideOnlyNum = 0 SnaxNarrowAndWide = 0 +SnaxNarrowAndWideNum = 0 SnaxWidePorts = 0 SnaxNarrowPorts = 0 @@ -53,13 +55,13 @@ for i in range(len(cfg['cores'])): if (cfg['cores'][i]['snax_acc_set']['snax_connect_tcdm_wide_only']): snax_acc_wide_only = True snax_tcdm_config_count += 1 - SnaxWideOnlyNum = SnaxWideOnlyNum + 1 + SnaxWideOnlyNum += 1 if (cfg['cores'][i]['snax_acc_set']['snax_connect_narrow_wide_mix']): snax_acc_narrow_wide_mix = True snax_tcdm_config_count += 1 - SnaxNarrowAndWide = SnaxNarrowAndWide + 1 - + SnaxNarrowAndWideNum += 1 + # Assertion check just to make sure # No one makes a mistake in setting the configurations assert (snax_tcdm_config_count <= 1), "Error! Can only have 1 tcdm configuration. Multi port, wide only, or narrow-wide mix. Default is narrow only." @@ -71,30 +73,20 @@ for i in range(len(cfg['cores'])): snax_wide_tcdm_start_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_start_idx'] snax_wide_tcdm_end_idx = cfg['cores'][i]['snax_acc_set']['snax_wide_tcdm_end_idx'] - if (len(narrow_start_idx_list) == 0): - narrow_start_idx_list.append(snax_narrow_tcdm_start_idx) - narrow_end_idx_list.append(snax_narrow_tcdm_end_idx) - wide_start_idx_list.append(snax_wide_tcdm_start_idx) - wide_end_idx_list.append(snax_wide_tcdm_end_idx) - else: - narrow_start_idx_list.append(snax_narrow_tcdm_start_idx + idx_offset) - narrow_end_idx_list.append(snax_narrow_tcdm_end_idx + idx_offset) - wide_start_idx_list.append(snax_wide_tcdm_start_idx + idx_offset) - wide_end_idx_list.append(snax_wide_tcdm_end_idx + idx_offset) - - SnaxWidePorts = SnaxWidePorts + snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 - SnaxNarrowPorts = SnaxNarrowPorts + snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 - + narrow_start_idx_list.append(snax_narrow_tcdm_start_idx + idx_offset) + narrow_end_idx_list.append(snax_narrow_tcdm_end_idx + idx_offset) + wide_start_idx_list.append(snax_wide_tcdm_start_idx + idx_offset) + wide_end_idx_list.append(snax_wide_tcdm_end_idx + idx_offset) idx_offset += snax_tcdm_ports - if (SnaxWideOnlyNum == len(cfg['cores']) - 1): + if (SnaxWideOnlyNum == len(cfg['cores'])): SnaxWideOnly = 1 - else: - if (SnaxNarrowAndWide != 0): - SnaxNarrowAndWide = 1 - else: - error("Error! TCDM configuration is not correct. Please check the configuration.") - + if (SnaxNarrowAndWideNum != 0): + SnaxNarrowAndWide = 1 + + SnaxWidePorts += snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 + SnaxNarrowPorts += snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 + # Cycle through each accelerator setting per Snitch core for j in range(cfg['cores'][i]['snax_acc_set']['snax_num_acc']): @@ -119,10 +111,8 @@ for i in range(len(cfg['cores'])): 'snax_tcdm_ports': snax_tcdm_ports, 'snax_tcdm_offset_start': tcdm_offset_start, 'snax_tcdm_offset_stop': tcdm_offset_stop, - 'snax_acc_wide_only': SnaxWideOnly, - 'snax_acc_narrow_wide_mix': SnaxNarrowAndWide, - 'SnaxWidePorts': SnaxWidePorts, - 'SnaxNarrowPorts': SnaxNarrowPorts, + 'snax_acc_wide_only': snax_acc_wide_only, + 'snax_acc_narrow_wide_mix': snax_acc_narrow_wide_mix, 'snax_narrow_tcdm_start_idx': snax_narrow_tcdm_start_idx, 'snax_narrow_tcdm_end_idx': snax_narrow_tcdm_end_idx, 'snax_wide_tcdm_start_idx': snax_wide_tcdm_start_idx, From cc7820b9a5a075e076a45e5f076442d3191ba480 Mon Sep 17 00:00:00 2001 From: xiaoling-yi Date: Tue, 2 Apr 2024 17:18:11 +0200 Subject: [PATCH 7/9] fix SnaxNarrowPorts --- hw/snitch_cluster/src/snitch_cluster.sv | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index d7a4e0069..e0fcae128 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -96,10 +96,6 @@ module snitch_cluster /// FPU configuration. parameter fpnew_pkg::fpu_implementation_t FPUImplementation [NrCores] = '{default: fpnew_pkg::fpu_implementation_t'(0)}, - /// SNAX Acc initial narrow TCDM ports - parameter int unsigned SnaxAccNarrowTcdmPorts = 0, - /// SNAX Acc initial wide TCDM ports - parameter int unsigned SnaxAccWideTcdmPorts = 0, /// Total Number of SNAX TCDM ports parameter bit SnaxWideOnly = 0, parameter bit SnaxNarrowAndWide = 0, @@ -749,7 +745,6 @@ module snitch_cluster end end else if (SnaxNarrowAndWide) begin always_comb begin - for(int i = 0; i < NrCores; i++) begin int wide_port_idx = 0; int narrow_port_idx = 0; @@ -997,7 +992,7 @@ module snitch_cluster if( SnaxNarrowPorts > 0) begin: gen_yes_snax_tcdm_interconnect snitch_tcdm_interconnect #( - .NumInp (NumTCDMIn + SnaxAccNarrowTcdmPorts), + .NumInp (NumTCDMIn + SnaxNarrowPorts), .NumOut (NrBanks), .tcdm_req_t (tcdm_req_t), .tcdm_rsp_t (tcdm_rsp_t), From 667ba607803aaa0aa1d95dd6a6bed92d92f627a4 Mon Sep 17 00:00:00 2001 From: xiaoling-yi Date: Wed, 3 Apr 2024 11:16:12 +0200 Subject: [PATCH 8/9] all narrow and all wide ok and mixed gemm ok --- hw/snitch_cluster/src/snitch_cluster.sv | 203 +++++++++--------- .../src/snitch_cluster_wrapper.sv.tpl | 8 +- 2 files changed, 112 insertions(+), 99 deletions(-) diff --git a/hw/snitch_cluster/src/snitch_cluster.sv b/hw/snitch_cluster/src/snitch_cluster.sv index e0fcae128..a85d3e552 100644 --- a/hw/snitch_cluster/src/snitch_cluster.sv +++ b/hw/snitch_cluster/src/snitch_cluster.sv @@ -660,7 +660,7 @@ module snitch_cluster // Use this ports for the total number and needs to be cute into multiple versions // It needs to be divided by 8 because each narrow TCDM port is 64 bits wide - localparam int unsigned NumSnaxWideTcdmPorts = SnaxWidePorts/8; + localparam int unsigned NumSnaxWideTcdmPorts = SnaxWidePorts / 8; tcdm_dma_req_t [NumSnaxWideTcdmPorts-1:0] snax_wide_req; tcdm_dma_rsp_t [NumSnaxWideTcdmPorts-1:0] snax_wide_rsp; @@ -668,7 +668,7 @@ module snitch_cluster tcdm_req_t [SnaxNarrowPorts-1:0] snax_narrow_req; tcdm_rsp_t [SnaxNarrowPorts-1:0] snax_narrow_rsp; - if(SnaxWideOnly)begin + if(SnaxWideOnly) begin: gen_wide_tcdm_only assign snax_narrow_req = '0; // This is for hard remapping of signals @@ -743,104 +743,115 @@ module snitch_cluster snax_tcdm_rsp_o[i*8].q_ready = snax_wide_rsp[i].q_ready; end end - end else if (SnaxNarrowAndWide) begin + end else if (SnaxNarrowAndWide) begin: gen_wide_narrow_mixed_tcdm + int narrow_port_idx [NrCores]; + int wide_port_idx [NrCores]; + int NumSnaxNarrowTcdmPortCurrentCore [NrCores-1]; + int NumSnaxWideTcdmPortCurrentCore [NrCores-1]; always_comb begin - for(int i = 0; i < NrCores; i++) begin - int wide_port_idx = 0; - int narrow_port_idx = 0; - if(SnaxWideEndIdx[i] - SnaxWideStartIdx[i] > 0) begin - int NumSnaxWideTcdmPortCurrentCore = (SnaxWideEndIdx[i] - SnaxWideStartIdx[i] + 1)/8; - for(int j = 0; j < NumSnaxWideTcdmPortCurrentCore; j++) begin - // Request ports - snax_wide_req[j + wide_port_idx].q.addr = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.addr ; - snax_wide_req[j + wide_port_idx].q.write = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.write; - snax_wide_req[j + wide_port_idx].q.amo = reqrsp_pkg::AMONone; - snax_wide_req[j + wide_port_idx].q.data = { - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.data, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.data - }; - snax_wide_req[j + wide_port_idx].q.strb = { - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.strb, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.strb - }; - snax_wide_req[i + wide_port_idx].q.user = '0; - snax_wide_req[i + wide_port_idx].q_valid = &{ - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q_valid, - snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q_valid - }; - - // Response ports - { - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p.data, - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p.data - } = snax_wide_rsp[j + wide_port_idx].p.data; - - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p_valid = snax_wide_rsp[j + wide_port_idx].p_valid; - - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].q_ready = snax_wide_rsp[j + wide_port_idx].q_ready; - wide_port_idx += NumSnaxWideTcdmPortCurrentCore; + // caculate the offset of each core for the narrow and wide ports + narrow_port_idx[0] = 0; + wide_port_idx[0] = 0; + // connect each core's inpoutted narrow ports to wide or narrow TCDM ports + for(int i = 0; i < NrCores - 1; i++) begin + // connect the narrow ports to the narrow TCDM ports + if (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] > 0) begin + NumSnaxNarrowTcdmPortCurrentCore[i] = (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] + 1); + for (int j = 0; j < NumSnaxNarrowTcdmPortCurrentCore[i]; j++) begin + // Request ports + snax_narrow_req[j + narrow_port_idx[i]].q.addr = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.addr ; + snax_narrow_req[j + narrow_port_idx[i]].q.write = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.write; + snax_narrow_req[j + narrow_port_idx[i]].q.amo = reqrsp_pkg::AMONone; + snax_narrow_req[j + narrow_port_idx[i]].q.data = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.data; + snax_narrow_req[j + narrow_port_idx[i]].q.strb = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.strb; + snax_narrow_req[j + narrow_port_idx[i]].q.user = '0; + snax_narrow_req[j + narrow_port_idx[i]].q_valid = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q_valid; + + // Response ports + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p.data = snax_narrow_rsp[j + narrow_port_idx[i]].p.data; + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p_valid = snax_narrow_rsp[j + narrow_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].q_ready = snax_narrow_rsp[j + narrow_port_idx[i]].q_ready; + + // update the internal narrow port index at granularity of each core + narrow_port_idx[i+1] = narrow_port_idx[i] + NumSnaxNarrowTcdmPortCurrentCore[i]; + end end - end - if (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] > 0) begin - int NumSnaxNarrowTcdmPortCurrentCore = (SnaxNarrowEndIdx[i] - SnaxNarrowStartIdx[i] + 1); - for (int j = 0; j < NumSnaxNarrowTcdmPortCurrentCore; j++) begin - // Request ports - snax_narrow_req[j + narrow_port_idx].q.addr = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.addr ; - snax_narrow_req[j + narrow_port_idx].q.write = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.write; - snax_narrow_req[j + narrow_port_idx].q.amo = reqrsp_pkg::AMONone; - snax_narrow_req[j + narrow_port_idx].q.data = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.data; - snax_narrow_req[j + narrow_port_idx].q.strb = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q.strb; - snax_narrow_req[j + narrow_port_idx].q.user = '0; - snax_narrow_req[j + narrow_port_idx].q_valid = snax_tcdm_req_i[SnaxNarrowStartIdx[i] + j].q_valid; - - // Response ports - snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p.data = snax_narrow_rsp[j + narrow_port_idx].p.data; - snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].p_valid = snax_narrow_rsp[j + narrow_port_idx].p_valid; - snax_tcdm_rsp_o[SnaxNarrowStartIdx[i] + j].q_ready = snax_narrow_rsp[j + narrow_port_idx].q_ready; - narrow_port_idx += NumSnaxNarrowTcdmPortCurrentCore; + // connect the wide ports to the wide TCDM ports + if(SnaxWideEndIdx[i] - SnaxWideStartIdx[i] > 0) begin + NumSnaxWideTcdmPortCurrentCore[i] = (SnaxWideEndIdx[i] - SnaxWideStartIdx[i] + 1) / 8; + for(int j = 0; j < NumSnaxWideTcdmPortCurrentCore[i]; j++) begin + // Request ports + snax_wide_req[j + wide_port_idx[i]].q.addr = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.addr ; + snax_wide_req[j + wide_port_idx[i]].q.write = snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.write; + snax_wide_req[j + wide_port_idx[i]].q.amo = reqrsp_pkg::AMONone; + snax_wide_req[j + wide_port_idx[i]].q.data = { + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.data, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.data + }; + snax_wide_req[j + wide_port_idx[i]].q.strb = { + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q.strb, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q.strb + }; + snax_wide_req[j + wide_port_idx[i]].q.user = '0; + snax_wide_req[j + wide_port_idx[i]].q_valid = &{ + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+7].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+6].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+5].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+4].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+3].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+2].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8+1].q_valid, + snax_tcdm_req_i[SnaxWideStartIdx[i] + j*8].q_valid + }; + + // Response ports + { + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p.data, + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p.data + } = snax_wide_rsp[j + wide_port_idx[i]].p.data; + + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].p_valid = snax_wide_rsp[j + wide_port_idx[i]].p_valid; + + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+7].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+6].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+5].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+4].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+3].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+2].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8+1].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + snax_tcdm_rsp_o[SnaxWideStartIdx[i] + j*8].q_ready = snax_wide_rsp[j + wide_port_idx[i]].q_ready; + + wide_port_idx[i+1] = wide_port_idx[i] + NumSnaxWideTcdmPortCurrentCore[i]; + end end - end - end + end end - end else begin + end else begin: gen_narrow_tcdm_only assign snax_narrow_req = snax_tcdm_req_i; assign snax_tcdm_rsp_o = snax_narrow_rsp; diff --git a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl index 4a7be3d3a..1d35ff5e0 100644 --- a/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl +++ b/hw/snitch_cluster/src/snitch_cluster_wrapper.sv.tpl @@ -79,13 +79,15 @@ for i in range(len(cfg['cores'])): wide_end_idx_list.append(snax_wide_tcdm_end_idx + idx_offset) idx_offset += snax_tcdm_ports - if (SnaxWideOnlyNum == len(cfg['cores'])): + if (SnaxWideOnlyNum == len(cfg['cores']) - 1): SnaxWideOnly = 1 if (SnaxNarrowAndWideNum != 0): SnaxNarrowAndWide = 1 - SnaxWidePorts += snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 - SnaxNarrowPorts += snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 + if( snax_wide_tcdm_end_idx != 0): + SnaxWidePorts += snax_wide_tcdm_end_idx - snax_wide_tcdm_start_idx + 1 + if( snax_narrow_tcdm_end_idx != 0): + SnaxNarrowPorts += snax_narrow_tcdm_end_idx - snax_narrow_tcdm_start_idx + 1 # Cycle through each accelerator setting per Snitch core for j in range(cfg['cores'][i]['snax_acc_set']['snax_num_acc']): From f2a321fca1f8ae5c8fe646649eebea9f595e8f93 Mon Sep 17 00:00:00 2001 From: rgantonio Date: Thu, 4 Apr 2024 16:01:20 +0200 Subject: [PATCH 9/9] cfg: Updated simd to wide only --- target/snitch_cluster/cfg/snax-test.hjson | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/target/snitch_cluster/cfg/snax-test.hjson b/target/snitch_cluster/cfg/snax-test.hjson index 34a84e1d9..b05380519 100644 --- a/target/snitch_cluster/cfg/snax-test.hjson +++ b/target/snitch_cluster/cfg/snax-test.hjson @@ -124,12 +124,12 @@ snax_module: "snax_streamer_simd_wrapper", snax_tcdm_ports: 40, snax_num_acc: 1, - snax_connect_tcdm_wide_only: false, - snax_connect_narrow_wide_mix: true, - snax_narrow_tcdm_start_idx: 32, - snax_narrow_tcdm_end_idx: 39, + snax_connect_tcdm_wide_only: true, + snax_connect_narrow_wide_mix: false, + snax_narrow_tcdm_start_idx: 0, + snax_narrow_tcdm_end_idx: 0, snax_wide_tcdm_start_idx: 0, - snax_wide_tcdm_end_idx: 31, + snax_wide_tcdm_end_idx: 39, } snax_acc_wide: false, num_int_outstanding_loads: 1,