diff --git a/t1/src/Lane.scala b/t1/src/Lane.scala index 65280267b..6b0790dfa 100644 --- a/t1/src/Lane.scala +++ b/t1/src/Lane.scala @@ -245,9 +245,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ @public val maskRequestToLSU: Bool = IO(Output(Bool())) - @public - val maskUnitResponse: ValidIO[MaskUnitExeResponse] = IO(Flipped(Valid(new MaskUnitExeResponse(parameter)))) - @public val tokenIO: LaneTokenBundle = IO(new LaneTokenBundle) @@ -262,20 +259,20 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ ) @public val vrfReadDataChannel: UInt = IO(Output(UInt(parameter.datapathWidth.W))) - @public - val vrfWriteChannel: DecoupledIO[VRFWriteRequest] = IO( - Flipped( - Decoupled( - new VRFWriteRequest( - parameter.vrfParam.regNumBits, - parameter.vrfOffsetBits, - parameter.instructionIndexBits, - parameter.datapathWidth - ) - ) - ) + + val vrfWriteType: VRFWriteRequest = new VRFWriteRequest( + parameter.vrfParam.regNumBits, + parameter.vrfOffsetBits, + parameter.instructionIndexBits, + parameter.datapathWidth ) + @public + val vrfWriteChannel: DecoupledIO[VRFWriteRequest] = IO(Flipped(Decoupled(vrfWriteType))) + + @public + val writeFromMask: Bool = IO(Input(Bool())) + /** for each instruction in the slot, response to top when instruction is finished in this lane. */ @public val instructionFinished: UInt = IO(Output(UInt(parameter.chainingSize.W))) @@ -371,7 +368,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ */ val vrfWriteArbiter: Vec[DecoupledIO[VRFWriteRequest]] = Wire( Vec( - parameter.chainingSize + 1, + parameter.chainingSize + 2, Decoupled( new VRFWriteRequest( parameter.vrfParam.regNumBits, @@ -382,16 +379,31 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ ) ) ) - val topWriteQueue: DecoupledIO[VRFWriteRequest] = Queue(vrfWriteChannel, 1, flow = true) - vrfWriteArbiter(parameter.chainingSize).valid := topWriteQueue.valid - vrfWriteArbiter(parameter.chainingSize).bits := topWriteQueue.bits - topWriteQueue.ready := vrfWriteArbiter(parameter.chainingSize).ready - val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 3) { i => + val lsuWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, 1, flow = true) + // connect lsuWriteQueue.enq + lsuWriteQueue.enq.valid := vrfWriteChannel.valid && !writeFromMask + lsuWriteQueue.enq.bits := vrfWriteChannel.bits + vrfWriteChannel.ready := writeFromMask || lsuWriteQueue.enq.ready + + val maskWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, parameter.maskUnitVefWriteQueueSize) + // connect maskWriteQueue.enq + maskWriteQueue.enq.valid := vrfWriteChannel.valid && writeFromMask + maskWriteQueue.enq.bits := vrfWriteChannel.bits + + vrfWriteArbiter(parameter.chainingSize).valid := lsuWriteQueue.deq.valid + vrfWriteArbiter(parameter.chainingSize).bits := lsuWriteQueue.deq.bits + lsuWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize).ready + + vrfWriteArbiter(parameter.chainingSize + 1).valid := maskWriteQueue.deq.valid + vrfWriteArbiter(parameter.chainingSize + 1).bits := maskWriteQueue.deq.bits + maskWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize + 1).ready + + val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 4) { i => RegInit(0.U.asTypeOf(vrfWriteArbiter.head.bits)) } - val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 3) { _ => RegInit(false.B) } - val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 3, Bool())) + val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 4) { _ => RegInit(false.B) } + val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 4, Bool())) val afterCheckDequeueFire: Seq[Bool] = afterCheckValid.zip(afterCheckDequeueReady).map { case (v, r) => v && r } /** for each slot, assert when it is asking [[T1]] to change mask */ @@ -590,8 +602,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ mask.enqueue <> stage3EnqWire maskUnitRequest <> mask.maskReq maskRequestToLSU <> mask.maskRequestToLSU - mask.maskUnitResponse := maskUnitResponse tokenIO <> mask.tokenIO + tokenIO.maskResponseRelease := maskWriteQueue.deq.fire mask.dequeue }.getOrElse(stage3EnqWire) stage3.enqueue <> stage3EnqSelect @@ -846,7 +858,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ // It’s been a long time since I selected it. Need pipe val queueBeforeMaskWrite: QueueIO[VRFWriteRequest] = Queue.io(chiselTypeOf(maskedWriteUnit.enqueue.bits), entries = 1, pipe = true) - val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 3).W)) + val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 4).W)) val writeCavitation: UInt = VecInit(allVrfWriteAfterCheck.map(_.mask === 0.U)).asUInt // 处理 rf @@ -1160,14 +1172,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ writeReadyForLsu := vrf.writeReadyForLsu vrfReadyToStore := vrf.vrfReadyToStore tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) => - rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex) - rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex + rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 2 + rptIndex) + rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 2 + rptIndex).instructionIndex } // todo: add mask unit write token tokenManager.responseReport.valid := maskUnitRequest.valid tokenManager.responseReport.bits := maskUnitRequest.bits.index - tokenManager.responseFeedbackReport.valid := maskUnitResponse.valid - tokenManager.responseFeedbackReport.bits := maskUnitResponse.bits.index + // todo: delete feedback token + tokenManager.responseFeedbackReport.valid := vrfWriteChannel.fire && writeFromMask + tokenManager.responseFeedbackReport.bits := vrfWriteChannel.bits.instructionIndex val instInSlot: UInt = slotControl .zip(slotOccupied) .map { case (slotState, occupied) => @@ -1196,9 +1209,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[ tokenManager.topWriteEnq.valid := vrfWriteChannel.fire tokenManager.topWriteEnq.bits := vrfWriteChannel.bits.instructionIndex + tokenManager.fromMask := writeFromMask + + tokenManager.lsuWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) + tokenManager.lsuWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex - tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize) - tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex + tokenManager.maskWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize + 1) + tokenManager.maskWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1).instructionIndex tokenManager.maskUnitLastReport := lsuLastReport diff --git a/t1/src/T1.scala b/t1/src/T1.scala index b1a718a96..5e6e7ee8c 100644 --- a/t1/src/T1.scala +++ b/t1/src/T1.scala @@ -335,7 +335,7 @@ class T1Probe(parameter: T1Parameter) extends Bundle { val requestReg: ValidIO[InstructionPipeBundle] = ValidIO(new InstructionPipeBundle(parameter)) val requestRegReady: Bool = Bool() // write queue enq for mask unit - val writeQueueEnq: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W)) + val writeQueueEnqVec: Vec[ValidIO[UInt]] = Vec(parameter.laneNumber, Valid(UInt(parameter.instructionIndexBits.W))) val writeQueueEnqMask: UInt = UInt((parameter.datapathWidth / 8).W) // mask unit instruction valid val instructionValid: UInt = UInt((parameter.chainingSize * 2).W) @@ -763,10 +763,13 @@ class T1(val parameter: T1Parameter) maskUnit.readResult(index) := lane.vrfReadDataChannel lsu.vrfReadResults(index) := lane.vrfReadDataChannel + val maskTryToWrite = maskUnit.exeResp(index) // lsu & mask unit write lane - lane.vrfWriteChannel.valid := vrfWrite(index).valid - lane.vrfWriteChannel.bits := vrfWrite(index).bits - vrfWrite(index).ready := lane.vrfWriteChannel.ready + // Mask write has absolute priority because it has a token + lane.vrfWriteChannel.valid := vrfWrite(index).valid || maskTryToWrite.valid + lane.vrfWriteChannel.bits := Mux(maskTryToWrite.valid, maskTryToWrite.bits, vrfWrite(index).bits) + vrfWrite(index).ready := lane.vrfWriteChannel.ready && !maskTryToWrite.valid + lane.writeFromMask := maskTryToWrite.valid lsu.offsetReadResult(index).valid := lane.maskUnitRequest.valid && lane.maskRequestToLSU lsu.offsetReadResult(index).bits := lane.maskUnitRequest.bits.source2 @@ -843,9 +846,7 @@ class T1(val parameter: T1Parameter) maskInput.valid := lane.maskUnitRequest.valid && !lane.maskRequestToLSU maskInput.bits := lane.maskUnitRequest.bits } - maskUnit.exeResp.zip(laneVec).foreach { case (maskOutput, lane) => - lane.maskUnitResponse <> maskOutput - } + maskUnit.tokenIO.zip(laneVec).zipWithIndex.foreach { case ((token, lane), index) => token.maskResponseRelease := lane.tokenIO.maskResponseRelease lane.tokenIO.maskRequestRelease := token.maskRequestRelease || lsu.tokenIO.offsetGroupRelease(index) @@ -965,10 +966,10 @@ class T1(val parameter: T1Parameter) probeWire.requestReg := requestReg probeWire.requestRegReady := requestRegDequeue.ready // maskUnitWrite maskUnitWriteReady - // todo - probeWire.writeQueueEnq.valid := DontCare - probeWire.writeQueueEnq.bits := DontCare - probeWire.writeQueueEnqMask := DontCare + probeWire.writeQueueEnqVec.zip(maskUnit.exeResp).foreach {case (probe, write) => + probe.valid := write.valid && write.bits.mask.orR + probe.bits := write.bits.instructionIndex + } probeWire.instructionValid := maskAnd( !slots.last.state.wMaskUnitLast && !slots.last.state.idle, indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2) diff --git a/t1/src/laneStage/MaskExchangeUnit.scala b/t1/src/laneStage/MaskExchangeUnit.scala index f340e8eb1..8067ac351 100644 --- a/t1/src/laneStage/MaskExchangeUnit.scala +++ b/t1/src/laneStage/MaskExchangeUnit.scala @@ -8,7 +8,6 @@ import chisel3.experimental.hierarchy.{instantiable, public} import chisel3.util._ import org.chipsalliance.t1.rtl._ import org.chipsalliance.t1.rtl.decoder.Decoder -import org.chipsalliance.dwbb.stdlib.queue.{Queue, QueueIO} @instantiable class MaskExchangeUnit(parameter: LaneParameter) extends Module { @@ -26,15 +25,9 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module { @public val maskRequestToLSU: Bool = IO(Output(Bool())) - @public - val maskUnitResponse: ValidIO[MaskUnitExeResponse] = IO(Flipped(Valid(new MaskUnitExeResponse(parameter)))) - @public val tokenIO: LaneTokenBundle = IO(new LaneTokenBundle) - val maskUnitWriteQueue: QueueIO[MaskUnitExeResponse] = - Queue.io(new MaskUnitExeResponse(parameter), parameter.maskUnitVefWriteQueueSize) - // todo: sSendResponse -> sendResponse val enqIsMaskRequest: Bool = !enqueue.bits.sSendResponse // not maskUnit && not send out @@ -59,27 +52,10 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module { maskRequestToLSU := enqueue.bits.loadStore - // type change MaskUnitExeResponse -> LaneStage3Enqueue - val maskUnitResponsePipeType: LaneStage3Enqueue = Wire(chiselTypeOf(enqueue.bits)) - maskUnitResponsePipeType := DontCare - maskUnitResponsePipeType.groupCounter := maskUnitWriteQueue.deq.bits.writeData.groupCounter - maskUnitResponsePipeType.data := maskUnitWriteQueue.deq.bits.writeData.data - maskUnitResponsePipeType.mask := maskUnitWriteQueue.deq.bits.writeData.mask - maskUnitResponsePipeType.vd := maskUnitWriteQueue.deq.bits.writeData.vd - maskUnitResponsePipeType.instructionIndex := maskUnitWriteQueue.deq.bits.index - maskUnitResponsePipeType.ffoByOtherLanes := maskUnitWriteQueue.deq.bits.ffoByOther - maskUnitResponsePipeType.pipeData := maskUnitWriteQueue.deq.bits.pipeData - - maskUnitWriteQueue.enq.valid := maskUnitResponse.valid - maskUnitWriteQueue.enq.bits := maskUnitResponse.bits - - val enqWantToSend: Bool = enqueue.valid && enqSendToDeq val maskRequestEnqReady: Bool = !enqIsMaskRequest || maskRequestAllow - dequeue.valid := (enqueue.valid && enqSendToDeq) || maskUnitWriteQueue.deq.valid - dequeue.bits := Mux(enqWantToSend, enqueue.bits, maskUnitResponsePipeType) + dequeue.valid := enqueue.valid && enqSendToDeq + dequeue.bits := enqueue.bits enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady) - maskUnitWriteQueue.deq.ready := dequeue.ready && !enqWantToSend - tokenIO.maskResponseRelease := maskUnitWriteQueue.deq.fire - + tokenIO.maskResponseRelease := DontCare } diff --git a/t1/src/laneStage/SlotTokenManager.scala b/t1/src/laneStage/SlotTokenManager.scala index 37da14da5..1734fee93 100644 --- a/t1/src/laneStage/SlotTokenManager.scala +++ b/t1/src/laneStage/SlotTokenManager.scala @@ -94,7 +94,13 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val topWriteEnq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public - val topWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) + val fromMask: Bool = IO(Input(Bool())) + + @public + val lsuWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) + + @public + val maskWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W)))) @public val instructionValid: UInt = IO(Output(UInt(parameter.chainingSize.W))) @@ -188,15 +194,10 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val responseDoDeq: UInt = maskAnd(responseReport.valid, indexToOH(responseReport.bits, parameter.chainingSize)).asUInt - val feedbackIndexSelect = Mux( - responseIndexQueue.deq.valid, - responseIndexQueue.deq.bits, - responseFeedbackReport.bits - ) val feedbackDoDeq: UInt = maskAnd(responseFeedbackReport.valid, indexToOH(responseFeedbackReport.bits, parameter.chainingSize)).asUInt - writeEnqSelect := writeDoEnq | feedbackDoDeq + writeEnqSelect := writeDoEnq val pendingResponse = tokenUpdate(responseToken, responseDoEnq, responseDoDeq) // todo: Precise feedback @@ -217,16 +218,25 @@ class SlotTokenManager(parameter: LaneParameter) extends Module { val instructionInWritePipe: UInt = tokenUpdate(writePipeToken, writePipeEnq, writePipeDeq) - // top write token - val topWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) - val topWriteDoEnq: UInt = - maskAnd(topWriteEnq.valid, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt + // lsu & mask write token + val lsuWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) + val maskWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W))) + + val lsuWriteDoEnq: UInt = + maskAnd(topWriteEnq.valid && !fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt + + val maskWriteDoEnq: UInt = + maskAnd(topWriteEnq.valid && fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt + + val lsuWriteDoDeq: UInt = + maskAnd(lsuWriteDeq.valid, indexToOH(lsuWriteDeq.bits, parameter.chainingSize)).asUInt - val topWriteDoDeq: UInt = - maskAnd(topWriteDeq.valid, indexToOH(topWriteDeq.bits, parameter.chainingSize)).asUInt + val maskWriteDoDeq: UInt = + maskAnd(maskWriteDeq.valid, indexToOH(maskWriteDeq.bits, parameter.chainingSize)).asUInt - val instructionInTopWritePipe = tokenUpdate(topWriteToken, topWriteDoEnq, topWriteDoDeq) + val lsuInTopWrite = tokenUpdate(lsuWriteToken, lsuWriteDoEnq, lsuWriteDoDeq) + val maskInTopWrite = tokenUpdate(maskWriteToken, maskWriteDoEnq, maskWriteDoDeq) - dataInWritePipe := instructionInWritePipe | instructionInTopWritePipe + dataInWritePipe := instructionInWritePipe | lsuInTopWrite | maskInTopWrite instructionValid := dataInWritePipe | instructionInSlot } diff --git a/t1/src/mask/MaskUnit.scala b/t1/src/mask/MaskUnit.scala index e9500f504..61a8145e6 100644 --- a/t1/src/mask/MaskUnit.scala +++ b/t1/src/mask/MaskUnit.scala @@ -50,8 +50,15 @@ class MaskUnit(parameter: T1Parameter) extends Module { } @public - val exeResp: Seq[ValidIO[MaskUnitExeResponse]] = Seq.tabulate(parameter.laneNumber) { _ => - IO(Valid(new MaskUnitExeResponse(parameter.laneParam))) + val exeResp: Seq[ValidIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ => + IO(Valid( + new VRFWriteRequest( + parameter.vrfParam.regNumBits, + parameter.laneParam.vrfOffsetBits, + parameter.instructionIndexBits, + parameter.datapathWidth + ) + )) } @public @@ -1081,8 +1088,15 @@ class MaskUnit(parameter: T1Parameter) extends Module { val writePort = exeResp(index) queue.deq.ready := !tokenCounter.asBools.last writePort.valid := tokenAllow - writePort.bits := queue.deq.bits - writePort.bits.writeData.vd := instReg.vd + writePort.bits.last := DontCare + writePort.bits.instructionIndex := instReg.instructionIndex + writePort.bits.data := Mux(queue.deq.bits.ffoByOther, queue.deq.bits.pipeData, queue.deq.bits.writeData.data) + writePort.bits.mask := queue.deq.bits.writeData.mask + writePort.bits.vd := instReg.vd + queue.deq.bits.writeData.groupCounter( + parameter.laneParam.groupNumberBits - 1, + parameter.laneParam.vrfOffsetBits + ) + writePort.bits.offset := queue.deq.bits.writeData.groupCounter } waiteStageDeqReady := writeQueue.zipWithIndex.map { case (queue, index) => !WillWriteLane(index) || queue.enq.ready diff --git a/t1/src/vrf/VRF.scala b/t1/src/vrf/VRF.scala index 2b8aee7b6..295e0cdc8 100644 --- a/t1/src/vrf/VRF.scala +++ b/t1/src/vrf/VRF.scala @@ -202,7 +202,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar @public val writeCheck: Vec[LSUWriteCheck] = IO( Vec( - parameter.chainingSize + 3, + parameter.chainingSize + 4, Input( new LSUWriteCheck( parameter.regNumBits, @@ -214,7 +214,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar ) @public - val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 3, Output(Bool()))) + val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 4, Output(Bool()))) /** when instruction is fired, record it in the VRF for chaining. */ @public