diff --git a/chip_top/AnyCore_Piton.sv b/chip_top/AnyCore_Piton.sv index 1ef2d07..28f7cdd 100644 --- a/chip_top/AnyCore_Piton.sv +++ b/chip_top/AnyCore_Piton.sv @@ -103,6 +103,7 @@ module AnyCore_Piton( wire [`ICACHE_BLOCK_ADDR_BITS-1:0] ic2memReqAddr; // memory read address wire ic2memReqValid; // memory read enable +wire [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay; // memory way wire [`ICACHE_TAG_BITS-1:0] mem2icTag; // tag of the incoming data wire [`ICACHE_INDEX_BITS-1:0] mem2icIndex; // index of the incoming data wire [`ICACHE_BITS_IN_LINE-1:0] mem2icData; // requested data @@ -110,11 +111,12 @@ wire mem2icRespValid; // requested data is ready wire mem2icInv; // icache invalidation wire [`ICACHE_INDEX_BITS-1:0] mem2icInvInd; // icache invalidation index -wire [0:0] mem2icInvWay; // icache invalidation way (unused) +wire [`ICACHE_NUM_WAYS_LOG-1:0] mem2icInvWay; // icache invalidation way (unused) // cache-to-memory interface for Loads wire [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2memLdAddr; // memory read address wire dc2memLdValid; // memory read enable +wire [1:0] dc2memReqWay; // memory way // memory-to-cache interface for Loads wire [`DCACHE_TAG_BITS-1:0] mem2dcLdTag; // tag of the incoming datadetermine @@ -130,7 +132,7 @@ wire dc2memStValid; wire mem2dcInv; // dcache invalidation wire [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd; // dcache invalidation index -wire [0:0] mem2dcInvWay; // dcache invalidation way (unused) +wire [1:0] mem2dcInvWay; // dcache invalidation way (unused) wire mem2dcStComplete; wire mem2dcStStall; @@ -448,6 +450,7 @@ Core_OOO coreTop( `ifdef INST_CACHE .ic2memReqAddr_o (ic2memReqAddr ), // memory read address .ic2memReqValid_o (ic2memReqValid ), // memory read enable + .ic2memReqWay_o (ic2memReqWay), .mem2icTag_i (mem2icTag ), // tag of the incoming data .mem2icIndex_i (mem2icIndex ), // index of the incoming data .mem2icData_i (mem2icData ), // requested data @@ -472,6 +475,7 @@ Core_OOO coreTop( .dc2memLdAddr_o (dc2memLdAddr ), // memory read address .dc2memLdValid_o (dc2memLdValid ), // memory read enable + .dc2memReqWay_o (dc2memReqWay ), .mem2dcLdTag_i (mem2dcLdTag ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex ), // index of the incoming data @@ -586,6 +590,7 @@ Core_OOO coreTop( //`endif + //DO WE EVEN NEED TO ADD STUFF HERE? // not supported at the moment assign transducer_l15_amo_op = `L15_AMO_OP_NONE; anycore_tri_transducer tri_transducer( @@ -597,6 +602,9 @@ Core_OOO coreTop( .ic2mem_reqaddr_i (ic2memReqAddr), .ic2mem_reqvalid_i (ic2memReqValid), + .ic2memReqWay_o (ic2memReqWay), + .dc2memReqWay_o (dc2memReqWay), + .dc2mem_ldaddr_i (dc2memLdAddr), .dc2mem_ldvalid_i (dc2memLdValid), diff --git a/chip_top/anycore_tri_transducer.v b/chip_top/anycore_tri_transducer.v index b1fc767..7a9a94a 100644 --- a/chip_top/anycore_tri_transducer.v +++ b/chip_top/anycore_tri_transducer.v @@ -10,6 +10,8 @@ module anycore_tri_transducer( input [`ICACHE_BLOCK_ADDR_BITS-1:0] ic2mem_reqaddr_i, input ic2mem_reqvalid_i, + output [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay_o, + output [1:0] dc2memReqWay_o, input [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2mem_ldaddr_i, @@ -58,6 +60,7 @@ module anycore_tri_transducer( input wire [1:0] l15_transducer_cross_invalidate_way_i, input wire l15_transducer_inval_dcache_inval_i, input wire l15_transducer_inval_icache_inval_i, + //said something about this wire input wire [1:0] l15_transducer_inval_way_i, input wire l15_transducer_blockinitstore_i, @@ -76,11 +79,11 @@ module anycore_tri_transducer( output mem2dc_invvalid_o, output [`DCACHE_INDEX_BITS-1:0] mem2dc_invindex_o, - output [0:0] mem2dc_invway_o, + output [1:0] mem2dc_invway_o, output mem2ic_invvalid_o, output [`ICACHE_INDEX_BITS-1:0] mem2ic_invindex_o, - output [0:0] mem2ic_invway_o, + output [`ICACHE_NUM_WAYS_LOG-1:0] mem2ic_invway_o, input dc2mem_stvalid_i, output reg mem2dc_stcomplete_o, @@ -98,14 +101,17 @@ reg header_ack_seen_reg; // Get full address that's 64 bits long since it's otherwise to icache // block alignment wire [63:0] anycore_imiss_full_addr = ic2mem_reqaddr_i << (64-`ICACHE_BLOCK_ADDR_BITS); -wire [1:0] anycore_imiss_way = ic2mem_reqaddr_i[`ICACHE_INDEX_BITS-1:`ICACHE_INDEX_BITS-2-1]; +//CHANGES +//wire [1:0] anycore_imiss_way = ic2mem_reqaddr_i[`ICACHE_INDEX_BITS-1:`ICACHE_INDEX_BITS-2-1]; +wire [`ICACHE_NUM_WAYS_LOG-1:0] anycore_imiss_way = ic2memReqWay_o; + // Sign extend to 64 bits //wire [63:0] anycore_store_full_addr = {{((64-`DCACHE_ST_ADDR_BITS)-3){dc2mem_staddr_i[`DCACHE_ST_ADDR_BITS-1]}}, (dc2mem_staddr_i << 3)}; wire [63:0] anycore_store_full_addr = {{((64-`DCACHE_ST_ADDR_BITS)-3){dc2mem_staddr_i[`DCACHE_ST_ADDR_BITS-1]}}, (dc2mem_staddr_i)}; -wire [1:0] anycore_store_way = dc2mem_staddr_i[`DCACHE_INDEX_BITS-1:`DCACHE_INDEX_BITS-2-1]; +wire [1:0] anycore_store_way = dc2memReqWay_o; // dc2mem_staddr_i[`DCACHE_INDEX_BITS-1:`DCACHE_INDEX_BITS-2-1]; // Sign extend to 64 bits wire [63:0] anycore_load_full_addr = {{((64-`DCACHE_BLOCK_ADDR_BITS)-4){dc2mem_ldaddr_i[`DCACHE_BLOCK_ADDR_BITS-1]}}, (dc2mem_ldaddr_i << 4)}; -wire [1:0] anycore_load_way = dc2mem_ldaddr_i[`DCACHE_INDEX_BITS-1:`DCACHE_INDEX_BITS-2-1]; +wire [1:0] anycore_load_way = dc2memReqWay_o; // dc2mem_ldaddr_i[`DCACHE_INDEX_BITS-1:`DCACHE_INDEX_BITS-2-1]; wire [63:0] anycore_dc2mem_stdata_flipped = {dc2mem_stdata_i[7:0], dc2mem_stdata_i[15:8], dc2mem_stdata_i[23:16], dc2mem_stdata_i[31:24], dc2mem_stdata_i[39:32], dc2mem_stdata_i[47:40], dc2mem_stdata_i[55:48], dc2mem_stdata_i[63:56]}; //wire [63:0] anycore_dc2mem_stdata_flipped = dc2mem_stdata_i; @@ -411,9 +417,13 @@ end assign mem2dc_invvalid_o = signal_dcache_inval & ~dinvalrst_reg; assign mem2ic_invvalid_o = signal_icache_inval & ~iinvalrst_reg; -assign mem2dc_invway_o = l15_transducer_inval_way_i; +//CHANGES +assign mem2dc_invway_o = dc2memReqWay_o; +//assign mem2dc_invway_o = ic2memReqWay_o; assign mem2dc_invindex_o = l15_transducer_inval_address_15_4_i[`DCACHE_INDEX_BITS+4-1:4]; -assign mem2ic_invway_o = l15_transducer_inval_way_i; +//assign mem2ic_invway_o = 1; //l15_transducer_inval_way_i; +assign mem2ic_invway_o = ic2memReqWay_o; +//assign mem2ic_invway_o = mem2icInvWay_i; assign mem2ic_invindex_o = l15_transducer_inval_address_15_4_i[`DCACHE_INDEX_BITS+4-1:4]; always @ * begin diff --git a/configs/CommonConfig.h b/configs/CommonConfig.h index 85e410b..dd8b94c 100644 --- a/configs/CommonConfig.h +++ b/configs/CommonConfig.h @@ -255,14 +255,16 @@ `define ICACHE_BITS_IN_LINE (`ICACHE_INSTS_IN_LINE*`SIZE_INSTRUCTION) //In bits `define ICACHE_BYTES_IN_LINE (`ICACHE_BITS_IN_LINE/8) `define ICACHE_BYTES_IN_LINE_LOG (`ICACHE_INSTS_IN_LINE_LOG + `ICACHE_INST_BYTE_OFFSET_LOG) //log2(ICACHE_BYTES_IN_LINE) - `define ICACHE_NUM_LINES 64 //128 - `define ICACHE_NUM_LINES_LOG 6 //7 + `define ICACHE_NUM_LINES 16 //128 + `define ICACHE_NUM_LINES_LOG 4 //7 `define ICACHE_OFFSET_BITS `ICACHE_INSTS_IN_LINE_LOG `define ICACHE_INDEX_BITS `ICACHE_NUM_LINES_LOG `define ICACHE_TAG_BITS (`SIZE_PC - `ICACHE_INDEX_BITS - `ICACHE_OFFSET_BITS - `ICACHE_INST_BYTE_OFFSET_LOG) `define ICACHE_BLOCK_ADDR_BITS (`SIZE_PC - `ICACHE_OFFSET_BITS - `ICACHE_INST_BYTE_OFFSET_LOG) // Cache block address `define ICACHE_PC_PKT_BITS 8 `define ICACHE_INST_PKT_BITS 8 + `define ICACHE_NUM_WAYS 4 + `define ICACHE_NUM_WAYS_LOG 2 `endif `ifdef DATA_CACHE diff --git a/core_top/Core_OOO.sv b/core_top/Core_OOO.sv index 923a406..7fad557 100644 --- a/core_top/Core_OOO.sv +++ b/core_top/Core_OOO.sv @@ -54,6 +54,8 @@ module Core_OOO( `ifdef INST_CACHE output [`ICACHE_BLOCK_ADDR_BITS-1:0]ic2memReqAddr_o, // memory read address output ic2memReqValid_o, // memory read enable + output [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay_o, + input [`ICACHE_TAG_BITS-1:0] mem2icTag_i, // tag of the incoming data input [`ICACHE_INDEX_BITS-1:0] mem2icIndex_i, // index of the incoming data input [`ICACHE_BITS_IN_LINE-1:0] mem2icData_i, // requested data @@ -61,7 +63,7 @@ module Core_OOO( input mem2icInv_i, // icache invalidation input [`ICACHE_INDEX_BITS-1:0] mem2icInvInd_i, // icache invalidation index - input [0:0] mem2icInvWay_i, // icache invalidation way (unused) + input [`ICACHE_NUM_WAYS_LOG-1:0] mem2icInvWay_i, // icache invalidation way (unused) //input instCacheBypass_i, input icScratchModeEn_i, // Should ideally be disabled by default @@ -78,6 +80,7 @@ module Core_OOO( // cache-to-memory interface for Loads output [`DCACHE_BLOCK_ADDR_BITS-1:0]dc2memLdAddr_o, // memory read address output reg dc2memLdValid_o, // memory read enable + output [1:0] dc2memReqWay_o, // memory-to-cache interface for Loads input [`DCACHE_TAG_BITS-1:0] mem2dcLdTag_i, // tag of the incoming datadetermine @@ -93,7 +96,7 @@ module Core_OOO( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) // memory-to-cache interface for stores input mem2dcStComplete_i, @@ -501,6 +504,7 @@ FetchStage1 fs1( `ifdef INST_CACHE .ic2memReqAddr_o (ic2memReqAddr_o ), // memory read address .ic2memReqValid_o (ic2memReqValid_o ), // memory read enable + .ic2memReqWay_o (ic2memReqWay_o ), .mem2icTag_i (mem2icTag_i ), // tag of the incoming data .mem2icIndex_i (mem2icIndex_i ), // index of the incoming data .mem2icData_i (mem2icData_i ), // requested data @@ -1608,6 +1612,7 @@ LSU lsu ( .dc2memLdAddr_o (dc2memLdAddr_o ), // memory read address .dc2memLdValid_o (dc2memLdValid_o ), // memory read enable + .dc2memReqWay_o (dc2memReqWay_o ), .mem2dcLdTag_i (mem2dcLdTag_i ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex_i ), // index of the incoming data @@ -1621,7 +1626,7 @@ LSU lsu ( .mem2dcInv_i, // dcache invalidation .mem2dcInvInd_i, // dcache invalidation index - .mem2dcInvWay_i, // dcache invalidation way (unusedndex + .mem2dcInvWay_i (mem2dcInvWay_i ), .mem2dcStComplete_i (mem2dcStComplete_i ), .mem2dcStStall_i (mem2dcStStall_i ), diff --git a/dcache/DCache_controller.sv b/dcache/DCache_controller.sv index 2bd579f..6af6375 100644 --- a/dcache/DCache_controller.sv +++ b/dcache/DCache_controller.sv @@ -42,6 +42,10 @@ module DCache_controller( input [`SIZE_DATA-1:0] stData_i, //input [2**`DCACHE_WORD_BYTE_OFFSET_LOG-1:0]stByteEn_i, output stHit_o, + output stHit_o1, + output stHit_o2, + output stHit_o3, + output stHit_total, `ifdef DATA_CACHE // cache-to-memory interface for Loads @@ -54,6 +58,8 @@ module DCache_controller( input [`DCACHE_BITS_IN_LINE-1:0] mem2dcLdData_i, // requested data input mem2dcLdValid_i, // indicates the requested data is ready + output logic [1:0] dc2memReqWay_o, + // cache-to-memory interface for stores output [`DCACHE_ST_ADDR_BITS-1:0] dc2memStAddr_o, // memory read address output [`SIZE_DATA-1:0] dc2memStData_o, // memory read address @@ -62,7 +68,7 @@ module DCache_controller( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) output stbEmpty_o, // Signals that there are no pending stores to be written to next level @@ -109,6 +115,9 @@ module DCache_controller( logic dcScratchWrEn_d1; logic mem2dcStComplete_d1; logic [`DCACHE_BITS_IN_LINE-1:0] mem2dcLdData_d1; // requested data + logic [`ICACHE_NUM_WAYS_LOG-1:0] RoundRobin [`ICACHE_NUM_LINES-1:0]; + int misses = 0; + int hits = 0; always_ff @(posedge clk or posedge reset) begin @@ -140,7 +149,12 @@ module DCache_controller( logic [`DCACHE_INDEX_BITS-1:0] ld_index_reg; logic [`DCACHE_TAG_BITS-1:0] ld_tag_reg; logic ldEn_reg; + //duplicated this data logic [`SIZE_DATA-1:0] ldData; + logic [`SIZE_DATA-1:0] ldData1; + logic [`SIZE_DATA-1:0] ldData2; + logic [`SIZE_DATA-1:0] ldData3; + logic [`SIZE_DATA-1:0] ldData_from_hit; // Maps to whichever ldData corresponds to a hit // store pc segments ///////////////////////////////////////////// logic [`DCACHE_OFFSET_BITS-1:0] st_offset; @@ -188,11 +202,35 @@ module DCache_controller( logic [`DCACHE_TAG_BITS-1:0] ld_cache_tag; logic [`DCACHE_BITS_IN_LINE-1:0]ld_cache_data; logic ld_cache_valid; + + logic [`DCACHE_TAG_BITS-1:0] ld_cache_tag1; + logic [`DCACHE_BITS_IN_LINE-1:0]ld_cache_data1; + logic ld_cache_valid1; + + logic [`DCACHE_TAG_BITS-1:0] ld_cache_tag2; + logic [`DCACHE_BITS_IN_LINE-1:0]ld_cache_data2; + logic ld_cache_valid2; + + logic [`DCACHE_TAG_BITS-1:0] ld_cache_tag3; + logic [`DCACHE_BITS_IN_LINE-1:0]ld_cache_data3; + logic ld_cache_valid3; // hit detection logic. hits are detected the cycle after ldEn_i goes high. // hit can stay high for multiple cycles if no new request comes (e.g. fetch // stalls) + + // CS 254: + // Erwan: Do we need to duplicate this data as well? Did that for now but can change it back. + // Rajan: Makes sense to me, since we want to separately check if each way has a hit + // when doing loads logic ldHit; + logic ldHit1 = 0; + logic ldHit2 = 0; + logic ldHit3 = 0; + logic ldHit_total; + + assign ldHit_total = ldHit | ldHit1 | ldHit2 | ldHit3; + assign ldMiss = ~ldHit_total; // Muxing might be needed to account for the differing load sizes @@ -202,39 +240,64 @@ module DCache_controller( //ldData_o = 32'hdeadbeef; // Consider it as a miss if a partial hit in STB. Being conservative // whenever a size mismatch is observed. - ldDataValid_o = ldHit & ~stbPartialHit; - ldHit_o = ldHit & ~stbPartialHit; + ldDataValid_o = ldHit_total & ~stbPartialHit; + ldHit_o = ldHit_total & ~stbPartialHit; case (ldSize_i) `LDST_BYTE: begin - ldData_o = (ldData >> {ldAddr_i[2:0], 3'h0}) & 64'h0000_0000_0000_00FF; + ldData_o = (ldData_from_hit >> {ldAddr_i[2:0], 3'h0}) & 64'h0000_0000_0000_00FF; if(ldSign_i) ldData_o = {{56{ldData_o[7]}},ldData_o[7:0]}; end `LDST_HALF_WORD: begin - ldData_o = (ldData >> {ldAddr_i[2:1], 4'h0}) & 64'h0000_0000_0000_FFFF; + ldData_o = (ldData_from_hit >> {ldAddr_i[2:1], 4'h0}) & 64'h0000_0000_0000_FFFF; if(ldSign_i) ldData_o = {{48{ldData_o[15]}},ldData_o[15:0]}; end `LDST_WORD: begin - ldData_o = (ldData >> {ldAddr_i[2], 5'h0}) & 64'h0000_0000_FFFF_FFFF; + ldData_o = (ldData_from_hit >> {ldAddr_i[2], 5'h0}) & 64'h0000_0000_FFFF_FFFF; if(ldSign_i) ldData_o = {{32{ldData_o[31]}},ldData_o[31:0]}; end `LDST_DOUBLE_WORD: - ldData_o = ldData; + ldData_o = ldData_from_hit; endcase // If trying to access heap region // if (ldAddr_i[31]) // ldData_o = 32'hdeadbeef; end + + always_ff @(posedge clk) + begin + if (reset) + begin + int i; + for (i = 0; i < `ICACHE_NUM_LINES; i++) + begin + RoundRobin[i] <= '0; + end + end + else if (ldMiss | stMiss_o) + begin + int i; + misses <= misses + 1; + $display("MISSES (dcache): %d", misses); + RoundRobin[ld_index] <= RoundRobin[ld_index] + 1'b1; + end + else if (ldHit_total) + begin + int i; + hits <= hits + 1; + $display("HITS (dcache): %d", hits); + end + end /////////////////////////////////////////////// @@ -265,9 +328,9 @@ module DCache_controller( logic miss_pulse; logic missUnderMiss; - assign miss = ~ldHit; + assign miss = ~ldHit_total; - assign ldMiss_o = miss & ldEn_i; + assign ldMiss_o = miss & ldEn_i; always_ff @(posedge clk or posedge reset) begin @@ -426,13 +489,27 @@ module DCache_controller( end //////////////////////////////////////////////////////////// - + //duplicated the logic here. logic [`DCACHE_TAG_BITS-1:0] st_cache_tag; logic [`DCACHE_BITS_IN_LINE-1:0] st_cache_data; logic st_cache_valid; + logic [`DCACHE_TAG_BITS-1:0] st_cache_tag1; + logic [`DCACHE_BITS_IN_LINE-1:0] st_cache_data1; + logic st_cache_valid1; + logic [`DCACHE_TAG_BITS-1:0] st_cache_tag2; + logic [`DCACHE_BITS_IN_LINE-1:0] st_cache_data2; + logic st_cache_valid2; + logic [`DCACHE_TAG_BITS-1:0] st_cache_tag3; + logic [`DCACHE_BITS_IN_LINE-1:0] st_cache_data3; + logic st_cache_valid3; + logic [`DCACHE_BITS_IN_LINE-1:0] st_cache_data_from_hit; logic [`DCACHE_BITS_IN_LINE-1:0] stbUpdateData; logic stHit; + logic stHit1; + logic stHit2; + logic stHit3; + logic stHit_total; // the unregistered index is for reading the tag/data array assign st_offset = stAddr_i[`DCACHE_OFFSET_BITS+`DCACHE_WORD_BYTE_OFFSET_LOG-1 : `DCACHE_WORD_BYTE_OFFSET_LOG]; @@ -512,8 +589,11 @@ module DCache_controller( // makes sure of a future hit. assign stHit_o = stHit; + assign stHit_o1 = stHit1; + assign stHit_o2 = stHit2; + assign stHit_o3 = stHit3; - assign stMiss_o = ~stHit & mem2dcStComplete_d1; + assign stMiss_o = ~stHit_total & mem2dcStComplete_d1; assign dc2memStAddr_o = st_addr_reg; assign dc2memStData_o = piton_stData_reg; @@ -667,26 +747,67 @@ module DCache_controller( end //////////////////////////////////////////////////////////// - + // (CS 254) + // Erwan: Do I need to duplicate this data? maybe store the ld data depending on the way of a hit or something? + // Rajan: Whichever ldData gets selected to be the cache's output should depend on the input way assign ldData = stbHit ? stbData[latestMatch] : ld_cache_data[ld_offset*`SIZE_DATA +: `SIZE_DATA]; + assign ldData1 = stbHit ? stbData[latestMatch] : ld_cache_data1[ld_offset*`SIZE_DATA +: `SIZE_DATA]; + assign ldData2 = stbHit ? stbData[latestMatch] : ld_cache_data2[ld_offset*`SIZE_DATA +: `SIZE_DATA]; + assign ldData3 = stbHit ? stbData[latestMatch] : ld_cache_data3[ld_offset*`SIZE_DATA +: `SIZE_DATA]; /* Cache data and tag arrays */ reg [`DCACHE_BITS_IN_LINE-1:0] data_array [`DCACHE_NUM_LINES-1:0]; reg [`DCACHE_TAG_BITS-1:0] tag_array [`DCACHE_NUM_LINES-1:0]; reg [`DCACHE_NUM_LINES-1:0] valid_array; + + reg [`DCACHE_BITS_IN_LINE-1:0] data_array1 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_TAG_BITS-1:0] tag_array1 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_NUM_LINES-1:0] valid_array1; + + reg [`DCACHE_BITS_IN_LINE-1:0] data_array2 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_TAG_BITS-1:0] tag_array2 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_NUM_LINES-1:0] valid_array2; + + reg [`DCACHE_BITS_IN_LINE-1:0] data_array3 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_TAG_BITS-1:0] tag_array3 [`DCACHE_NUM_LINES-1:0]; + reg [`DCACHE_NUM_LINES-1:0] valid_array3; always_comb begin + dc2memReqWay_o = RoundRobin[ld_index]; + + // (CS 254) Although it seems like we would want to switch based on + // the invWay coming in, we actually need to compute all 4 sets of + // data below. After all, if there isn't a hit, the data for that + // way won't get updated anyway. ld_cache_data = data_array[ld_index]; ld_cache_tag = tag_array[ld_index]; ld_cache_valid = valid_array[ld_index]; + + ld_cache_data1 = data_array1[ld_index]; + ld_cache_tag1 = tag_array1[ld_index]; + ld_cache_valid1 = valid_array1[ld_index]; + + ld_cache_data2 = data_array2[ld_index]; + ld_cache_tag2 = tag_array2[ld_index]; + ld_cache_valid2 = valid_array2[ld_index]; + + ld_cache_data3 = data_array3[ld_index]; + ld_cache_tag3 = tag_array3[ld_index]; + ld_cache_valid3 = valid_array3[ld_index]; end always_comb begin // If hit in store buffer, ignore the cache array hit as STB has latest value. // If hit in store buffer, it is a miss if the sizes are not compatible. + + // CS 254 + // Erwan: Scratch mode so won't modify the ldHit stuff in here for now, is this necessary? + // it seems that apart from being assigned to the miss and being declared this is the only spot + // where ldHit is being used so does it need to be duplicated at all? + // Rajan: We don't need to worry about associativity during scratch mode, so we can leave it. ldHit = 1'b0; // NOTE: SCRATCH MODE if(dcScratchModeEn_d1) @@ -694,28 +815,92 @@ module DCache_controller( else begin if(stbHit) + begin + // (CS 254) When there's a store buffer hit, the cache isn't used, so we don't + // need to duplicate this ldHit = (ldSize_i <= stbStSize[latestMatch]) & ldEn_i; // Must indicate ldHit only when there's a valid ldEn + end else - ldHit = (ld_cache_tag == ld_tag) & ld_cache_valid & ldEn_i; + begin + // (CS 254) Duplicated ldHit logic here. + ldHit = (ld_cache_tag == ld_tag) & ld_cache_valid & ldEn_i; + ldHit1 = (ld_cache_tag1 == ld_tag) & ld_cache_valid1 & ldEn_i; + ldHit2 = (ld_cache_tag2 == ld_tag) & ld_cache_valid2 & ldEn_i; + ldHit3 = (ld_cache_tag3 == ld_tag) & ld_cache_valid3 & ldEn_i; + end end end + always_comb + begin + // This will be the data that gets output from the cache. Each ldData corresponds to a way + // and each hit is true when that way has a hit (based on the tag) + ldData_from_hit = ldHit ? ldData : ldHit1 ? ldData1 : ldHit2 ? ldData2 : ldHit3 ? ldData3 : ldData; + end always_ff @(posedge clk) begin // No need to update to a line that is being replaced // by a fill. + + // CS 254: + // Erwan: Duplicated logic here based on stHit + // should I duplicate more logic here apart from stHit and data_array? + // Rajan: No need. if(stHit & ~((stbHeadIndex == fillIndex) & fillValid)) begin data_array[stbHeadIndex] <= stbUpdateData; end + else if(stHit1 & ~((stbHeadIndex == fillIndex) & fillValid)) + begin + data_array1[stbHeadIndex] <= stbUpdateData; + end + else if(stHit2 & ~((stbHeadIndex == fillIndex) & fillValid)) + begin + data_array2[stbHeadIndex] <= stbUpdateData; + end + else if(stHit3 & ~((stbHeadIndex == fillIndex) & fillValid)) + begin + data_array3[stbHeadIndex] <= stbUpdateData; + end + + // CS 254: Brought over from icache controller + if (reset) + begin + int i; + for(i = 0; i < `DCACHE_NUM_LINES; i++) + begin + data_array[i] <= 0; + data_array1[i] <= 0; + data_array2[i] <= 0; + data_array3[i] <= 0; + end + end // Fill to the same line gets priority over store update // as the block being stored to is being overwritten anyway. - if(fillValid) + else if(fillValid) begin - data_array[fillIndex] <= fillData; - tag_array[fillIndex] <= fillTag; + if (mem2dcInvWay_i == 2'b00) + begin + data_array[fillIndex] <= fillData; + tag_array[fillIndex] <= fillTag; + end + else if (mem2dcInvWay_i == 2'b01) + begin + data_array1[fillIndex] <= fillData; + tag_array1[fillIndex] <= fillTag; + end + else if (mem2dcInvWay_i == 2'b10) + begin + data_array2[fillIndex] <= fillData; + tag_array2[fillIndex] <= fillTag; + end + else if (mem2dcInvWay_i == 2'b11) + begin + data_array3[fillIndex] <= fillData; + tag_array3[fillIndex] <= fillTag; + end end // Load scratch pad from outside else if(dcScratchWrEn_d1 & dcScratchModeEn_d1) @@ -727,27 +912,67 @@ module DCache_controller( // Reading the bytes through the SCRATCH interface assign dcScratchRdData_o = data_array[dcScratchWrIndex_d1][(dcScratchWrByte_d1*8) +: 8]; - + //basically copied what we had from the icache controller here, I'm assuming its not much different always_ff @(posedge clk or posedge reset) begin int i; if(reset) begin for(i = 0; i < `DCACHE_NUM_LINES;i++) + begin valid_array[i] <= 1'b0; + valid_array1[i] <= 1'b0; + valid_array2[i] <= 1'b0; + valid_array3[i] <= 1'b0; + end end else if(dcFlush_i) begin for(i = 0; i < `DCACHE_NUM_LINES;i++) - valid_array[i] <= 1'b0; + begin + valid_array[i] <= 1'b0; + valid_array1[i] <= 1'b0; + valid_array2[i] <= 1'b0; + valid_array3[i] <= 1'b0; + end end else if(mem2dcInv_i) begin - valid_array[mem2dcInvInd_i] <= 1'b0; + if (mem2dcInvWay_i == 2'b00) + begin + valid_array[mem2dcInvInd_i] <= 1'b0; + end + else if (mem2dcInvWay_i == 2'b01) + begin + valid_array1[mem2dcInvInd_i] <= 1'b0; + end + else if (mem2dcInvWay_i == 2'b10) + begin + valid_array2[mem2dcInvInd_i] <= 1'b0; + end + else if (mem2dcInvWay_i == 2'b11) + begin + valid_array3[mem2dcInvInd_i] <= 1'b0; + end end else if(fillValid) begin - valid_array[fillIndex] <= 1'b1; + if (mem2dcInvWay_i == 2'b00) + begin + valid_array[fillIndex] <= 1'b1; + end + else if (mem2dcInvWay_i == 2'b01) + begin + valid_array1[fillIndex] <= 1'b1; + end + else if (mem2dcInvWay_i == 2'b10) + begin + valid_array2[fillIndex] <= 1'b1; + end + else if (mem2dcInvWay_i == 2'b11) + begin + valid_array3[fillIndex] <= 1'b1; + end end end @@ -771,16 +996,34 @@ module DCache_controller( st_cache_data = data_array[stbHeadIndex]; st_cache_tag = tag_array[stbHeadIndex]; st_cache_valid = valid_array[stbHeadIndex]; + + st_cache_data1 = data_array1[stbHeadIndex]; + st_cache_tag1 = tag_array1[stbHeadIndex]; + st_cache_valid1 = valid_array1[stbHeadIndex]; + + st_cache_data2 = data_array2[stbHeadIndex]; + st_cache_tag2 = tag_array2[stbHeadIndex]; + st_cache_valid2 = valid_array2[stbHeadIndex]; + + st_cache_data3 = data_array3[stbHeadIndex]; + st_cache_tag3 = tag_array3[stbHeadIndex]; + st_cache_valid3 = valid_array3[stbHeadIndex]; end // NOTE: SCRATCH MODE // If in scratch mode, store hits whenever there is a store to be done. assign stHit = dcScratchModeEn_d1 ? stEn_i : (((st_cache_tag == stbHeadTag) & st_cache_valid) & mem2dcStComplete_d1); + assign stHit1 = dcScratchModeEn_d1 ? stEn_i : (((st_cache_tag1 == stbHeadTag) & st_cache_valid1) & mem2dcStComplete_d1); + assign stHit2 = dcScratchModeEn_d1 ? stEn_i : (((st_cache_tag2 == stbHeadTag) & st_cache_valid2) & mem2dcStComplete_d1); + assign stHit3 = dcScratchModeEn_d1 ? stEn_i : (((st_cache_tag3 == stbHeadTag) & st_cache_valid3) & mem2dcStComplete_d1); + assign stHit_total = stHit | stHit1 | stHit2 | stHit3; + assign st_cache_data_from_hit = stHit ? st_cache_data : stHit1 ? st_cache_data1 : stHit2 ? st_cache_data2 : stHit3 ? st_cache_data3 : st_cache_data; always_comb begin + //Most likely also need to add some logic here? int i,j; - stbUpdateData = st_cache_data; + stbUpdateData = st_cache_data_from_hit; // Merge received data with the latest store data byte by byte for(i=0;i<`DCACHE_WORDS_IN_LINE;i++) diff --git a/fetch/FetchStage1.sv b/fetch/FetchStage1.sv index 3d23f21..8a427f2 100644 --- a/fetch/FetchStage1.sv +++ b/fetch/FetchStage1.sv @@ -38,6 +38,8 @@ module FetchStage1( `ifdef INST_CACHE output [`ICACHE_BLOCK_ADDR_BITS-1:0]ic2memReqAddr_o, // memory read address output ic2memReqValid_o, // memory read enable + output [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay_o, + input [`ICACHE_TAG_BITS-1:0] mem2icTag_i, // tag of the incoming data input [`ICACHE_INDEX_BITS-1:0] mem2icIndex_i, // index of the incoming data input [`ICACHE_BITS_IN_LINE-1:0] mem2icData_i, // requested data @@ -45,7 +47,7 @@ module FetchStage1( input mem2icInv_i, // icache invalidation input [`ICACHE_INDEX_BITS-1:0] mem2icInvInd_i, // icache invalidation index - input [0:0] mem2icInvWay_i, // icache invalidation way (unused) + input [`ICACHE_NUM_WAYS_LOG-1:0] mem2icInvWay_i, // icache invalidation way (unused) input icScratchModeEn_i, // Should ideally be disabled by default input [`ICACHE_INDEX_BITS+`ICACHE_BYTES_IN_LINE_LOG-1:0] icScratchWrAddr_i, @@ -330,6 +332,7 @@ L1ICache l1icache( `ifdef INST_CACHE .ic2memReqAddr_o (ic2memReqAddr_o ), // memory read address .ic2memReqValid_o (ic2memReqValid_o ), // memory read enable + .ic2memReqWay_o (ic2memReqWay_o ), .mem2icTag_i (mem2icTag_i ), // tag of the incoming data .mem2icIndex_i (mem2icIndex_i ), // index of the incoming data .mem2icData_i (mem2icData_i ), // requested data diff --git a/fetch/L1ICache.sv b/fetch/L1ICache.sv index d8056f3..92b538f 100644 --- a/fetch/L1ICache.sv +++ b/fetch/L1ICache.sv @@ -47,6 +47,8 @@ module L1ICache ( `ifdef INST_CACHE output [`ICACHE_BLOCK_ADDR_BITS-1:0] ic2memReqAddr_o, // memory read address output ic2memReqValid_o, // memory read enable + output [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay_o, + input [`ICACHE_TAG_BITS-1:0] mem2icTag_i, // tag of the incoming data input [`ICACHE_INDEX_BITS-1:0] mem2icIndex_i, // index of the incoming data input [`ICACHE_BITS_IN_LINE-1:0] mem2icData_i, // requested data @@ -54,7 +56,7 @@ module L1ICache ( input mem2icInv_i, // icache invalidation input [`ICACHE_INDEX_BITS-1:0] mem2icInvInd_i, // icache invalidation index - input [0:0] mem2icInvWay_i, // icache invalidation way (unused) + input [`ICACHE_NUM_WAYS_LOG-1:0] mem2icInvWay_i, // icache invalidation way (unused) input icScratchModeEn_i, // Should ideally be disabled by default input [`ICACHE_INDEX_BITS+`ICACHE_BYTES_IN_LINE_LOG-1:0] icScratchWrAddr_i, @@ -310,6 +312,7 @@ module L1ICache ( .icFlushDone_o (icFlushDone_o), .ic2memReqAddr_o (ic2memReqAddr_o), .ic2memReqValid_o (ic2memReqValid_o), + .ic2memReqWay_o (ic2memReqWay_o), .icScratchWrAddr_i (icScratchWrAddr_i), .icScratchWrEn_i (icScratchWrEn_i ), diff --git a/icache/ICache_controller.sv b/icache/ICache_controller.sv index cb22576..36429dc 100644 --- a/icache/ICache_controller.sv +++ b/icache/ICache_controller.sv @@ -16,6 +16,30 @@ # AnyCore is distributed under the BSD license. *******************************************************************************/ +/* +questions +1) Where would the MSHR in the diagram provided from the CS154 s22 slides? +2) What exactly is scratch mode? I think it is A temporary location in memory that allows for something to be saved, what is its purpose however? + mode to not use it as cache but just use it as memory. Benefit for embedded class applications to access it in 1 cycle +3) Don't completely understand what the assign part is doing in terms of syntax around line 128-130 +4) What are all the MSHR miss signals and what do they do? (like missd1, missd2, miss pulse) +5) can you explain a little bit about what's going on in lines 213 to 280 +6) Don't really understand the declaration in line 353-355 +7) what is the assign BIST parts in lines 406-409 +8) +*/ + +/* +CHANGES I'VE MADE SO FAR IN THE CODE + +1) I've changed the size of ICACHE_NUM_LINES and ICACHE_NUM_LINES_LOG to be 4 times smaller. +This is because the amount of tags will now quadruple and ICACHE_TAG_BITS depends on the size of +ICACHE_NUM_LINES. +2) quadrupled the amount of data_array, valid_array, and tag_array +3) also duplicated the cache_data, cache_tag, cache_valid, we will need to add a replacement policy +to figure out what to evict from each subcache +*/ + `timescale 1ns/100ps @@ -32,17 +56,25 @@ module ICache_controller#( `ifdef INST_CACHE // cache-to-memory interface output [`ICACHE_BLOCK_ADDR_BITS-1:0] ic2memReqAddr_o, // memory read address - output reg ic2memReqValid_o, // memory read enable + output reg ic2memReqValid_o, // memory read enable + + //wire this into the tri transducer + //that value comes from roundRobin logic + //tri transducer line 101, replace with ic2memReqWay after adding to module + output logic [`ICACHE_NUM_WAYS_LOG-1:0] ic2memReqWay_o, // memory way // memory-to-cache interface input [`ICACHE_TAG_BITS-1:0] mem2icTag_i, // tag of the incoming datadetermine input [`ICACHE_INDEX_BITS-1:0] mem2icIndex_i, // index of the incoming data + input [`ICACHE_BITS_IN_LINE-1:0] mem2icData_i, // requested data input mem2icRespValid_i, // indicates the requested data is ready input mem2icInv_i, // icache invalidation input [`ICACHE_INDEX_BITS-1:0] mem2icInvInd_i, // icache invalidation index - input [0:0] mem2icInvWay_i, // icache invalidation way (unused) + + //now this is supposed to be 2 bits wide + input [`ICACHE_NUM_WAYS_LOG-1:0] mem2icInvWay_i, // icache invalidation way (unused) input [`ICACHE_INDEX_BITS+`ICACHE_BYTES_IN_LINE_LOG-1:0] icScratchWrAddr_i, input icScratchWrEn_i, @@ -67,9 +99,12 @@ module ICache_controller#( // 32 24 16 8 0 // |-------|-------|-------|-------| // ttttttttttiiiiiiiiioo + // ttttttttttttiiiiiiioo // // note: the tag is only 10 bits because the pc will never be higher // than 32'h007fffff for CPU2000 benchmarks + + //we now want to have more tag, and less index bits so 12 tag bits and 7 index bits. //////////////////////////////////////////////////////////// @@ -82,6 +117,10 @@ module ICache_controller#( logic [`ICACHE_BYTES_IN_LINE_LOG-1:0] icScratchWrByte_d1; logic [7:0] icScratchWrData_d1; logic icScratchWrEn_d1; + logic [`ICACHE_NUM_WAYS_LOG-1:0] RoundRobin [`ICACHE_NUM_LINES-1:0]; + logic [`ICACHE_NUM_WAYS_LOG-1:0] lru [`ICACHE_NUM_WAYS-1:0][`ICACHE_NUM_LINES-1:0]; + int misses = 0; + int hits = 0; always_ff @(posedge clk or posedge reset) begin @@ -99,12 +138,14 @@ module ICache_controller#( icScratchWrIndex_d1 <= icScratchWrAddr_i[`ICACHE_INDEX_BITS+`ICACHE_BYTES_IN_LINE_LOG-1:`ICACHE_BYTES_IN_LINE_LOG]; icScratchWrByte_d1 <= icScratchWrAddr_i[`ICACHE_BYTES_IN_LINE_LOG-1:0]; icScratchWrData_d1 <= icScratchWrData_i; - icScratchWrEn_d1 <= icScratchWrEn_i ; - end + icScratchWrEn_d1 <= icScratchWrEn_i; + end end // pc segments ///////////////////////////////////////////// + //PROBABLY NEED TO MODIFY THE SIZE OF THIS WITH THE MACROS + //logic [`SIZE_PC-1:0] pc_i4; logic [`ICACHE_OFFSET_BITS-1:0] pc_offset; logic [`ICACHE_INDEX_BITS-1:0] pc_index; logic [`ICACHE_TAG_BITS-1:0] pc_tag; @@ -114,6 +155,9 @@ module ICache_controller#( logic fetchReq_reg; // the unregistered index is for reading the tag/data array + //DONT REALLY UNDERSTAND THIS PART --> DONT NEED TO WORRY ABOUT THIS + //indexing into the vector to pull certain number of bits, will follow if you modify the macros + //assign pc_i4 = pc_i + 5'b10000; assign pc_offset = pc_i[`ICACHE_OFFSET_BITS+`ICACHE_INST_BYTE_OFFSET_LOG-1 : `ICACHE_INST_BYTE_OFFSET_LOG]; assign pc_index = pc_i[`ICACHE_OFFSET_BITS+`ICACHE_INDEX_BITS+`ICACHE_INST_BYTE_OFFSET_LOG-1 : `ICACHE_OFFSET_BITS+`ICACHE_INST_BYTE_OFFSET_LOG]; assign pc_tag = pc_i[`SIZE_PC-1 : `ICACHE_OFFSET_BITS+`ICACHE_INDEX_BITS+`ICACHE_INST_BYTE_OFFSET_LOG]; @@ -128,16 +172,19 @@ module ICache_controller#( end // tag, valid and data read from cache ///////////////////// - logic [`ICACHE_TAG_BITS-1:0] cache_tag; - logic [`ICACHE_BITS_IN_LINE-1:0] cache_data; - logic cache_valid; - + //PROBABLY NEED TO MODIFY THE SIZE OF THIS + logic [`ICACHE_TAG_BITS-1:0] cache_tag [`ICACHE_NUM_WAYS-1:0]; + logic [`ICACHE_BITS_IN_LINE-1:0] cache_data [`ICACHE_NUM_WAYS-1:0]; + logic cache_valid [`ICACHE_NUM_WAYS-1:0]; + // hit detection logic. hits are detected the cycle after fetchReq_i goes high. // hit can stay high for multiple cycles if no new request comes (e.g. fetch // stalls) logic [0:`FETCH_WIDTH-1] instValid; logic [`SIZE_INSTRUCTION-1:0] inst [0:`FETCH_WIDTH-1]; - logic hit; + + logic [`ICACHE_NUM_WAYS-1:0] hit; + logic totalHit; assign inst_o = inst; @@ -166,7 +213,7 @@ module ICache_controller#( logic miss_pulse; logic missUnderMiss; - assign miss = ~hit; + assign miss = ~totalHit; assign icMiss_o = ic2memReqValid_o; @@ -184,7 +231,102 @@ module ICache_controller#( miss_d2 <= miss_d1 & ~fillValid; end end - + + //new roundRobin logic here, we want to do roundRobin for each cache line as well, not just the cache block + always_ff @(posedge clk) + begin + if (reset) + begin + int i; + int j; + for (i = 0; i < `ICACHE_NUM_LINES; i++) + begin + RoundRobin[i] <= '0; + for (j = 0; j < `ICACHE_NUM_WAYS; j++) + begin + lru[j][i] = j; + end + end + end + //in case of a miss, should we just implement this by 1 as well so instead of lru[0][pc_index] we have something else? + else if (miss) + begin + int i; + int x; + misses <= misses + 1; + $display("MISSES: %d", misses); + RoundRobin[pc_index] <= RoundRobin[pc_index] + 1'b1; + //need to check if there is a fill valid or else we cannot actually + //add anything to the cache + //find which one of the ways is the one being invalidated + for (i = 0; i < `ICACHE_NUM_WAYS; i++) + begin + if (lru[i][pc_index] == mem2icInvWay_i) + begin + x = i; + break; + end + end + //now do the same process of moving it to the the most recently used + //this is because whatever was in it was evicted and the newest entry + //being put in now counts as the most recently used + for (i = x; i < `ICACHE_NUM_WAYS - 1; i++) + begin + //$display("lru"); + lru[i][pc_index] = lru[i+1][pc_index]; + end + lru[`ICACHE_NUM_WAYS - 1][pc_index] = mem2icInvWay_i; + // for (i = 0; i < `ICACHE_NUM_WAYS; i++) + // begin + // $display("lru: %d", lru[i][pc_index]); + // end + end + else if (totalHit) + begin + int i; + int hitnum; + int x; + hits <= hits + 1; + $display("HITS: %d", hits); + //figure out which way there was a hit in the first place + //this is to figure out which way in the lru do we need to move to most recently used + for (hitnum = 0; hitnum < `ICACHE_NUM_WAYS; hitnum++) + begin + if(hit[hitnum]) + begin + i = hitnum; + break; + end + end + //$display("value of i: %d", i); + //$display("value of hitnum: %d", hitnum); + //need to figure out where in the lru is that hit way + //because it could have been moved around + for (i = 0; i < `ICACHE_NUM_WAYS; i++) + begin + if (lru[i][pc_index] == hitnum) + begin + x = i; + break; + end + end + //now we have to shift all elements down and add the way to the end + //which counts as it being the most recently used + //on the way out, the way at lru[0] which is the least recently used will be the + //one that will be evicted and replaced. + for (i = x; i < `ICACHE_NUM_WAYS - 1; i++) + begin + //$display("lru"); + lru[i][pc_index] = lru[i+1][pc_index]; + end + lru[`ICACHE_NUM_WAYS - 1][pc_index] = hitnum; + // for (i = 0; i < `ICACHE_NUM_WAYS; i++) + // begin + // $display("lru: %d", lru[i][pc_index]); + // end + end + end + assign miss_pulse = miss_d1 & ~miss_d2; //assign miss_pulse = fetchReq_i & miss & ~miss_d1; @@ -274,14 +416,14 @@ module ICache_controller#( //////////////////////////////////////////////////////////// - + //can probably ignore this part for the purposes of this project or double check the config always_comb begin int i; for (i = 0; i < `FETCH_WIDTH; i++) instValid[i] = 1'b0; - if (hit) + if (totalHit) begin instValid[0] = 1'b1; // First slot is always valid irrespective of the offset @@ -324,31 +466,70 @@ module ICache_controller#( end end - logic [(2*`ICACHE_BITS_IN_LINE)-1 : 0] cache_data_extended; + //DO WE NEED TO MODIFY THE SIZE OF THIS AS WELL? + logic [(2*`ICACHE_BITS_IN_LINE)-1 : 0] cache_data_extended [`ICACHE_NUM_WAYS-1:0]; // extract the instruction from the cache block always_comb begin int i; - cache_data_extended = {{`ICACHE_BITS_IN_LINE{1'b0}},cache_data}; // Like reading two consecutive cache blocks + for(i = 0; i < `ICACHE_NUM_WAYS; i++) + begin + cache_data_extended[i] = {{`ICACHE_BITS_IN_LINE{1'b0}},cache_data[i]}; + end + for(i = 0;i < `FETCH_WIDTH;i++) begin - // Instructions going to the pipeline is still 64 bit but in the cache, its 40 bits. Padding with 0s. - inst[i] = {24'b0,cache_data_extended[((pc_offset+i)*`SIZE_INSTRUCTION)+`SIZE_INSTRUCTION-1 -: `SIZE_INSTRUCTION]}; + //Instructions going to the pipeline is still 64 bit but in the cache, its 40 bits. Padding with 0s. + int j; + for(j = 0; j < `ICACHE_NUM_WAYS; j++) + begin + if (hit[j]) + begin + inst[i] = {24'b0,cache_data_extended[j][((pc_offset+i)*`SIZE_INSTRUCTION)+`SIZE_INSTRUCTION-1 -: `SIZE_INSTRUCTION]}; + break; + end + end end end - /* Cache data and tag arrays */ - logic [`ICACHE_BITS_IN_LINE-1:0] data_array [`ICACHE_NUM_LINES-1:0]; - logic [(`ICACHE_TAG_BITS*`ICACHE_INSTS_IN_LINE)-1:0]tag_array [`ICACHE_NUM_LINES-1:0]; - logic [`ICACHE_NUM_LINES-1:0] valid_array; + /* Cache data and tag arrays */ + //this is what we duplicate to add associativity + //start here make everything 0-3 + logic [`ICACHE_BITS_IN_LINE-1:0] data_array [`ICACHE_NUM_WAYS-1:0] [`ICACHE_NUM_LINES-1:0]; + logic [(`ICACHE_TAG_BITS*`ICACHE_INSTS_IN_LINE)-1:0] tag_array [`ICACHE_NUM_WAYS-1:0] [`ICACHE_NUM_LINES-1:0]; + logic [`ICACHE_NUM_LINES-1:0] valid_array [`ICACHE_NUM_WAYS-1:0]; + //look into if how we're implementing round robin here is correct/ works and doesn't break the code + //duplicate as well and then use cache replacement policy to figure out which ones to extract from 0-3 + //do we also need to duplicate the pc_index or is that fine? + //NEED TO ADD CACHE REPLACEMENT POLICY LET'S START WITH ROUND ROBIN + always_comb begin - cache_data = data_array[pc_index]; - cache_tag = tag_array[pc_index]; - cache_valid = valid_array[pc_index]; + int i; + ic2memReqWay_o = RoundRobin[pc_index]; + //should this be the hit way instead? as in the way that is the least recently used up next? + //we probably need the actual value in there instead of the index? + //don't we also care about the miss? in a miss, we need to add to the cache so if its full we + //also need to evict a cache line? + //ic2memReqWay_o = lru[0][pc_index]; + + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + cache_data[i] = data_array[i][pc_index]; + cache_tag[i] = tag_array[i][pc_index]; + cache_valid[i] = valid_array[i][pc_index]; + end + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + hit[i] = icScratchModeEn_d1 + ? fetchReq_i + : ((cache_tag[i] == pc_tag) & cache_valid[i] & fetchReq_i) | mmuException_i; + end + + end // A fetch only generates a hit if fetchReq_i is high @@ -364,9 +545,18 @@ module ICache_controller#( // there. Fetch will hit on this garbage data later. Eventually, the exception // will be handled at retirement and cause will be fixed. Fetch can happen normally // thereafter. - assign hit = icScratchModeEn_d1 - ? fetchReq_i - : ((cache_tag == pc_tag) & cache_valid & fetchReq_i) | mmuException_i; + + //needs to be modified you can do like hit 0-3 and then see how it falls into place from how the hit is structured + //for the direct cache below + + //PROBLEM WITH HIT, THEY ARE ALL 1 AT THE SAME TIME FOR SOME REASON, ONLY ONE OF THEM SHOULD HIT AT A TIME + //you can also make this 4 bits wise and do an or reduction but this is fine as well + + + //I'm assuming that we're going to need a variable to see if there was a hit in any of the 4 subcaches + //add a mux here to find out which way the hit came from + + assign totalHit = (hit == '0) ? 1'b0 : 1'b1; // Initializes the first 4 lines of the data array to the following BIST sequence. // This is only useful in scratch pad mode cause in cache mode, these lines are invalid. @@ -400,44 +590,92 @@ module ICache_controller#( //assign BIST[2] = {`ADD_INST_1,`NOP_INST ,`ADD_INST_2,`TOGGLE_INST_C,`ADD_INST_2 ,`ADD_INST_1,`ADD_INST_2,`ADD_INST_1 }; //assign BIST[3] = {`NOP_INST ,`NOP_INST ,`JUMP_INST ,`ADD_INST_1 ,`TOGGLE_INST_C,`ADD_INST_2,`ADD_INST_1,`ADD_INST_2 }; + //modify with data array 0-3 + //don't have to worry about the case with scratch data + //make sure here that we have a way to keep track of what data_array and tag_array are being handled? + //this is true for fillValid I believe, for resetting, we can just reset any cache? need to ask more details + + //chip top -> anycore TRI transducer + //line 61 invalidate way --> tell you which way to replace so wire this out + //line 414-415 get wired this into icache + //line 63, change that to be 4 bits for 4 way associativity + //top level --> anycorePiton.sv mem2icInvWay --> change the size of that as well, it is instantiated it + //instead of roundrobin here you use mem2icinvway always_ff @(posedge clk or posedge reset) begin int i; + int j; + int x; + //maybe change this to be <= j'b0, instead of double for-loop to make the code cleaner if(reset) begin - for(i=0 ; i < 4; i++) - data_array[i] <= BIST[i]; + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + for(j=0 ; j < `ICACHE_NUM_LINES; j++) + begin + data_array[i][j] <= '0; + end + end end else if(fillValid) begin - data_array[fillIndex] <= fillData; - tag_array[fillIndex] <= fillTag; + data_array[mem2icInvWay_i][fillIndex] <= fillData; + tag_array[mem2icInvWay_i][fillIndex] <= fillTag; + //need to also redirect this information in order for it to work? end // Load scratch pad from outside else if(icScratchWrEn_d1 & icScratchModeEn_d1) begin - data_array[icScratchWrIndex_d1][(icScratchWrByte_d1*8) +: 8] <= icScratchWrData_d1; + data_array[0][icScratchWrIndex_d1][(icScratchWrByte_d1*8) +: 8] <= icScratchWrData_d1; end end // Reading the bytes through the SCRATCH interface - assign icScratchRdData_o = data_array[icScratchWrIndex_d1][(icScratchWrByte_d1*8) +: 8]; + assign icScratchRdData_o = data_array[0][icScratchWrIndex_d1][(icScratchWrByte_d1*8) +: 8]; +//which hit you have so will probably need to have a case here or if/else +// THIS IS WHERE ALL OF THOSE DIFFERENT HITS WILL COME INTO PLAY, RIGHT HERE +// WILL NEED TO TEST THIS WITH A REGISTER STORE/LOAD INTENSIVE PROGRAM +//make sure you always flush all of the ways (0-3) which ever cache falls under this category always_ff @(posedge clk or posedge reset) begin if(reset | icFlush_i) begin int i; - for(i = 0; i < `ICACHE_NUM_LINES;i++) - valid_array[i] <= 1'b0; + int j; + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + for(j = 0; j < `ICACHE_NUM_LINES;j++) + begin + valid_array[i][j] <= 1'b0; + end + end end + //change this to avoid the for loop. else if(mem2icInv_i) begin - valid_array[mem2icInvInd_i] <= 1'b0; + int i; + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + if (mem2icInvWay_i == i) + begin + valid_array[i][fillIndex] <= 1'b0; + break; + end + end end + //change this to avoid the for loop. else if(fillValid) begin - valid_array[fillIndex] <= 1'b1; + int i; + for(i = 0;i < `ICACHE_NUM_WAYS;i++) + begin + if (mem2icInvWay_i == i) + begin + valid_array[i][fillIndex] <= 1'b1; + break; + end + end end end diff --git a/lsu/L1DataCache.sv b/lsu/L1DataCache.sv index d499e0d..a08ab42 100644 --- a/lsu/L1DataCache.sv +++ b/lsu/L1DataCache.sv @@ -36,6 +36,7 @@ module L1DataCache( // cache-to-memory interface for Loads output [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2memLdAddr_o, // memory read address output reg dc2memLdValid_o, // memory read enable + output [1:0] dc2memReqWay_o, // memory-to-cache interface for Loads input [`DCACHE_TAG_BITS-1:0] mem2dcLdTag_i, // tag of the incoming datadetermine @@ -51,7 +52,7 @@ module L1DataCache( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) // memory-to-cache interface for stores input mem2dcStComplete_i, @@ -144,6 +145,7 @@ reg [7:0] stEn; // LOG_SIZE_DATA - 1 = 7 .dc2memLdAddr_o (dc2memLdAddr_o ), // memory read address .dc2memLdValid_o (dc2memLdValid_o ), // memory read enable + .dc2memReqWay_o (dc2memReqWay_o ), .mem2dcLdTag_i (mem2dcLdTag_i ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex_i ), // index of the incoming data @@ -157,7 +159,7 @@ reg [7:0] stEn; // LOG_SIZE_DATA - 1 = 7 .mem2dcInv_i, // dcache invalidation .mem2dcInvInd_i, // dcache invalidation index - .mem2dcInvWay_i, // dcache invalidation way (unusedndex + .mem2dcInvWay_i (mem2dcInvWay_i ), .mem2dcStComplete_i(mem2dcStComplete_i ), .mem2dcStStall_i (mem2dcStStall_i ), diff --git a/lsu/LDX_path_structured.sv b/lsu/LDX_path_structured.sv index b163107..5df8d8c 100644 --- a/lsu/LDX_path_structured.sv +++ b/lsu/LDX_path_structured.sv @@ -40,6 +40,7 @@ module LDX_path_structured ( // cache-to-memory interface for Loads output [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2memLdAddr_o, // memory read address output reg dc2memLdValid_o, // memory read enable + output [1:0] dc2memReqWay_o, // memory-to-cache interface for Loads input [`DCACHE_TAG_BITS-1:0] mem2dcLdTag_i, // tag of the incoming datadetermine @@ -55,7 +56,7 @@ module LDX_path_structured ( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) // memory-to-cache interface for stores input mem2dcStComplete_i, @@ -189,6 +190,7 @@ L1DataCache L1dCache ( .dc2memLdAddr_o (dc2memLdAddr_o ), // memory read address .dc2memLdValid_o (dc2memLdValid_o ), // memory read enable + .dc2memReqWay_o (dc2memReqWay_o ), .mem2dcLdTag_i (mem2dcLdTag_i ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex_i ), // index of the incoming data @@ -202,7 +204,7 @@ L1DataCache L1dCache ( .mem2dcInv_i, // dcache invalidation .mem2dcInvInd_i, // dcache invalidation index - .mem2dcInvWay_i, // dcache invalidation way (unusedndex + .mem2dcInvWay_i (mem2dcInvWay_i ), .mem2dcStComplete_i (mem2dcStComplete_i ), .mem2dcStStall_i (mem2dcStStall_i ), diff --git a/lsu/LSUDatapath.sv b/lsu/LSUDatapath.sv index 8b73eba..df48dad 100644 --- a/lsu/LSUDatapath.sv +++ b/lsu/LSUDatapath.sv @@ -45,6 +45,7 @@ module LSUDatapath ( // cache-to-memory interface for Loads output [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2memLdAddr_o, // memory read address output reg dc2memLdValid_o, // memory read enable + output [1:0] dc2memReqWay_o, // memory-to-cache interface for Loads input [`DCACHE_TAG_BITS-1:0] mem2dcLdTag_i, // tag of the incoming datadetermine @@ -60,7 +61,7 @@ module LSUDatapath ( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) // memory-to-cache interface for stores input mem2dcStComplete_i, @@ -306,6 +307,7 @@ LDX_path_structured ldx_path ( .dc2memLdAddr_o (dc2memLdAddr_o ), // memory read address .dc2memLdValid_o (dc2memLdValid_o ), // memory read enable + .dc2memReqWay_o (dc2memReqWay_o ), .mem2dcLdTag_i (mem2dcLdTag_i ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex_i ), // index of the incoming data @@ -319,7 +321,7 @@ LDX_path_structured ldx_path ( .mem2dcInv_i, // dcache invalidation .mem2dcInvInd_i, // dcache invalidation index - .mem2dcInvWay_i, // dcache invalidation way (unused) + .mem2dcInvWay_i (mem2dcInvWay_i ), .mem2dcStComplete_i (mem2dcStComplete_i ), .mem2dcStStall_i (mem2dcStStall_i ), diff --git a/lsu/LoadStoreUnit.sv b/lsu/LoadStoreUnit.sv index dff33ba..79428ca 100644 --- a/lsu/LoadStoreUnit.sv +++ b/lsu/LoadStoreUnit.sv @@ -116,6 +116,7 @@ module LSU ( // cache-to-memory interface for Loads output [`DCACHE_BLOCK_ADDR_BITS-1:0] dc2memLdAddr_o, // memory read address output reg dc2memLdValid_o, // memory read enable + output [1:0] dc2memReqWay_o, // memory-to-cache interface for Loads input [`DCACHE_TAG_BITS-1:0] mem2dcLdTag_i, // tag of the incoming datadetermine @@ -131,7 +132,7 @@ module LSU ( input mem2dcInv_i, // dcache invalidation input [`DCACHE_INDEX_BITS-1:0] mem2dcInvInd_i, // dcache invalidation index - input [0:0] mem2dcInvWay_i, // dcache invalidation way (unused) + input [1:0] mem2dcInvWay_i, // dcache invalidation way (unused) // memory-to-cache interface for stores input mem2dcStComplete_i, @@ -294,6 +295,7 @@ LSUDatapath datapath ( .dc2memLdAddr_o (dc2memLdAddr_o ), // memory read address .dc2memLdValid_o (dc2memLdValid_o ), // memory read enable + .dc2memReqWay_o (dc2memReqWay_o ), .mem2dcLdTag_i (mem2dcLdTag_i ), // tag of the incoming datadetermine .mem2dcLdIndex_i (mem2dcLdIndex_i ), // index of the incoming data @@ -307,7 +309,7 @@ LSUDatapath datapath ( .mem2dcInv_i, // dcache invalidation .mem2dcInvInd_i, // dcache invalidation index - .mem2dcInvWay_i, // dcache invalidation way (unusedndex + .mem2dcInvWay_i (mem2dcInvWay_i ), .mem2dcStComplete_i (mem2dcStComplete_i ), .mem2dcStStall_i (mem2dcStStall_i ),