Skip to content

Commit

Permalink
[rtl] fix waw.
Browse files Browse the repository at this point in the history
  • Loading branch information
qinjun-li committed Nov 14, 2024
1 parent d939d08 commit 43a260a
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 90 deletions.
79 changes: 48 additions & 31 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,6 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
@public
val maskRequestToLSU: Bool = IO(Output(Bool()))

@public
val maskUnitResponse: ValidIO[MaskUnitExeResponse] = IO(Flipped(Valid(new MaskUnitExeResponse(parameter))))

@public
val tokenIO: LaneTokenBundle = IO(new LaneTokenBundle)

Expand All @@ -262,20 +259,20 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
)
@public
val vrfReadDataChannel: UInt = IO(Output(UInt(parameter.datapathWidth.W)))
@public
val vrfWriteChannel: DecoupledIO[VRFWriteRequest] = IO(
Flipped(
Decoupled(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.vrfOffsetBits,
parameter.instructionIndexBits,
parameter.datapathWidth
)
)
)

val vrfWriteType: VRFWriteRequest = new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.vrfOffsetBits,
parameter.instructionIndexBits,
parameter.datapathWidth
)

@public
val vrfWriteChannel: DecoupledIO[VRFWriteRequest] = IO(Flipped(Decoupled(vrfWriteType)))

@public
val writeFromMask: Bool = IO(Input(Bool()))

/** for each instruction in the slot, response to top when instruction is finished in this lane. */
@public
val instructionFinished: UInt = IO(Output(UInt(parameter.chainingSize.W)))
Expand Down Expand Up @@ -371,7 +368,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
*/
val vrfWriteArbiter: Vec[DecoupledIO[VRFWriteRequest]] = Wire(
Vec(
parameter.chainingSize + 1,
parameter.chainingSize + 2,
Decoupled(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
Expand All @@ -382,16 +379,31 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
)
)
)
val topWriteQueue: DecoupledIO[VRFWriteRequest] = Queue(vrfWriteChannel, 1, flow = true)
vrfWriteArbiter(parameter.chainingSize).valid := topWriteQueue.valid
vrfWriteArbiter(parameter.chainingSize).bits := topWriteQueue.bits
topWriteQueue.ready := vrfWriteArbiter(parameter.chainingSize).ready

val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 3) { i =>
val lsuWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, 1, flow = true)
// connect lsuWriteQueue.enq
lsuWriteQueue.enq.valid := vrfWriteChannel.valid && !writeFromMask
lsuWriteQueue.enq.bits := vrfWriteChannel.bits
vrfWriteChannel.ready := writeFromMask || lsuWriteQueue.enq.ready

val maskWriteQueue: QueueIO[VRFWriteRequest] = Queue.io(vrfWriteType, parameter.maskUnitVefWriteQueueSize)
// connect maskWriteQueue.enq
maskWriteQueue.enq.valid := vrfWriteChannel.valid && writeFromMask
maskWriteQueue.enq.bits := vrfWriteChannel.bits

vrfWriteArbiter(parameter.chainingSize).valid := lsuWriteQueue.deq.valid
vrfWriteArbiter(parameter.chainingSize).bits := lsuWriteQueue.deq.bits
lsuWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize).ready

vrfWriteArbiter(parameter.chainingSize + 1).valid := maskWriteQueue.deq.valid
vrfWriteArbiter(parameter.chainingSize + 1).bits := maskWriteQueue.deq.bits
maskWriteQueue.deq.ready := vrfWriteArbiter(parameter.chainingSize + 1).ready

val allVrfWriteAfterCheck: Seq[VRFWriteRequest] = Seq.tabulate(parameter.chainingSize + 4) { i =>
RegInit(0.U.asTypeOf(vrfWriteArbiter.head.bits))
}
val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 3) { _ => RegInit(false.B) }
val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 3, Bool()))
val afterCheckValid: Seq[Bool] = Seq.tabulate(parameter.chainingSize + 4) { _ => RegInit(false.B) }
val afterCheckDequeueReady: Vec[Bool] = Wire(Vec(parameter.chainingSize + 4, Bool()))
val afterCheckDequeueFire: Seq[Bool] = afterCheckValid.zip(afterCheckDequeueReady).map { case (v, r) => v && r }

/** for each slot, assert when it is asking [[T1]] to change mask */
Expand Down Expand Up @@ -590,8 +602,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
mask.enqueue <> stage3EnqWire
maskUnitRequest <> mask.maskReq
maskRequestToLSU <> mask.maskRequestToLSU
mask.maskUnitResponse := maskUnitResponse
tokenIO <> mask.tokenIO
tokenIO.maskResponseRelease := maskWriteQueue.deq.fire
mask.dequeue
}.getOrElse(stage3EnqWire)
stage3.enqueue <> stage3EnqSelect
Expand Down Expand Up @@ -846,7 +858,7 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
// It’s been a long time since I selected it. Need pipe
val queueBeforeMaskWrite: QueueIO[VRFWriteRequest] =
Queue.io(chiselTypeOf(maskedWriteUnit.enqueue.bits), entries = 1, pipe = true)
val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 3).W))
val writeSelect: UInt = Wire(UInt((parameter.chainingSize + 4).W))
val writeCavitation: UInt = VecInit(allVrfWriteAfterCheck.map(_.mask === 0.U)).asUInt

// 处理 rf
Expand Down Expand Up @@ -1160,14 +1172,15 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
writeReadyForLsu := vrf.writeReadyForLsu
vrfReadyToStore := vrf.vrfReadyToStore
tokenManager.crossWriteReports.zipWithIndex.foreach { case (rpt, rptIndex) =>
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 1 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1 + rptIndex).instructionIndex
rpt.valid := afterCheckDequeueFire(parameter.chainingSize + 2 + rptIndex)
rpt.bits := allVrfWriteAfterCheck(parameter.chainingSize + 2 + rptIndex).instructionIndex
}
// todo: add mask unit write token
tokenManager.responseReport.valid := maskUnitRequest.valid
tokenManager.responseReport.bits := maskUnitRequest.bits.index
tokenManager.responseFeedbackReport.valid := maskUnitResponse.valid
tokenManager.responseFeedbackReport.bits := maskUnitResponse.bits.index
// todo: delete feedback token
tokenManager.responseFeedbackReport.valid := vrfWriteChannel.fire && writeFromMask
tokenManager.responseFeedbackReport.bits := vrfWriteChannel.bits.instructionIndex
val instInSlot: UInt = slotControl
.zip(slotOccupied)
.map { case (slotState, occupied) =>
Expand Down Expand Up @@ -1196,9 +1209,13 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[

tokenManager.topWriteEnq.valid := vrfWriteChannel.fire
tokenManager.topWriteEnq.bits := vrfWriteChannel.bits.instructionIndex
tokenManager.fromMask := writeFromMask

tokenManager.lsuWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize)
tokenManager.lsuWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex

tokenManager.topWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize)
tokenManager.topWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize).instructionIndex
tokenManager.maskWriteDeq.valid := afterCheckDequeueFire(parameter.chainingSize + 1)
tokenManager.maskWriteDeq.bits := allVrfWriteAfterCheck(parameter.chainingSize + 1).instructionIndex

tokenManager.maskUnitLastReport := lsuLastReport

Expand Down
23 changes: 12 additions & 11 deletions t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ class T1Probe(parameter: T1Parameter) extends Bundle {
val requestReg: ValidIO[InstructionPipeBundle] = ValidIO(new InstructionPipeBundle(parameter))
val requestRegReady: Bool = Bool()
// write queue enq for mask unit
val writeQueueEnq: ValidIO[UInt] = Valid(UInt(parameter.instructionIndexBits.W))
val writeQueueEnqVec: Vec[ValidIO[UInt]] = Vec(parameter.laneNumber, Valid(UInt(parameter.instructionIndexBits.W)))
val writeQueueEnqMask: UInt = UInt((parameter.datapathWidth / 8).W)
// mask unit instruction valid
val instructionValid: UInt = UInt((parameter.chainingSize * 2).W)
Expand Down Expand Up @@ -763,10 +763,13 @@ class T1(val parameter: T1Parameter)
maskUnit.readResult(index) := lane.vrfReadDataChannel
lsu.vrfReadResults(index) := lane.vrfReadDataChannel

val maskTryToWrite = maskUnit.exeResp(index)
// lsu & mask unit write lane
lane.vrfWriteChannel.valid := vrfWrite(index).valid
lane.vrfWriteChannel.bits := vrfWrite(index).bits
vrfWrite(index).ready := lane.vrfWriteChannel.ready
// Mask write has absolute priority because it has a token
lane.vrfWriteChannel.valid := vrfWrite(index).valid || maskTryToWrite.valid
lane.vrfWriteChannel.bits := Mux(maskTryToWrite.valid, maskTryToWrite.bits, vrfWrite(index).bits)
vrfWrite(index).ready := lane.vrfWriteChannel.ready && !maskTryToWrite.valid
lane.writeFromMask := maskTryToWrite.valid

lsu.offsetReadResult(index).valid := lane.maskUnitRequest.valid && lane.maskRequestToLSU
lsu.offsetReadResult(index).bits := lane.maskUnitRequest.bits.source2
Expand Down Expand Up @@ -843,9 +846,7 @@ class T1(val parameter: T1Parameter)
maskInput.valid := lane.maskUnitRequest.valid && !lane.maskRequestToLSU
maskInput.bits := lane.maskUnitRequest.bits
}
maskUnit.exeResp.zip(laneVec).foreach { case (maskOutput, lane) =>
lane.maskUnitResponse <> maskOutput
}

maskUnit.tokenIO.zip(laneVec).zipWithIndex.foreach { case ((token, lane), index) =>
token.maskResponseRelease := lane.tokenIO.maskResponseRelease
lane.tokenIO.maskRequestRelease := token.maskRequestRelease || lsu.tokenIO.offsetGroupRelease(index)
Expand Down Expand Up @@ -965,10 +966,10 @@ class T1(val parameter: T1Parameter)
probeWire.requestReg := requestReg
probeWire.requestRegReady := requestRegDequeue.ready
// maskUnitWrite maskUnitWriteReady
// todo
probeWire.writeQueueEnq.valid := DontCare
probeWire.writeQueueEnq.bits := DontCare
probeWire.writeQueueEnqMask := DontCare
probeWire.writeQueueEnqVec.zip(maskUnit.exeResp).foreach {case (probe, write) =>
probe.valid := write.valid && write.bits.mask.orR
probe.bits := write.bits.instructionIndex
}
probeWire.instructionValid := maskAnd(
!slots.last.state.wMaskUnitLast && !slots.last.state.idle,
indexToOH(slots.last.record.instructionIndex, parameter.chainingSize * 2)
Expand Down
30 changes: 3 additions & 27 deletions t1/src/laneStage/MaskExchangeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import chisel3.experimental.hierarchy.{instantiable, public}
import chisel3.util._
import org.chipsalliance.t1.rtl._
import org.chipsalliance.t1.rtl.decoder.Decoder
import org.chipsalliance.dwbb.stdlib.queue.{Queue, QueueIO}

@instantiable
class MaskExchangeUnit(parameter: LaneParameter) extends Module {
Expand All @@ -26,15 +25,9 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module {
@public
val maskRequestToLSU: Bool = IO(Output(Bool()))

@public
val maskUnitResponse: ValidIO[MaskUnitExeResponse] = IO(Flipped(Valid(new MaskUnitExeResponse(parameter))))

@public
val tokenIO: LaneTokenBundle = IO(new LaneTokenBundle)

val maskUnitWriteQueue: QueueIO[MaskUnitExeResponse] =
Queue.io(new MaskUnitExeResponse(parameter), parameter.maskUnitVefWriteQueueSize)

// todo: sSendResponse -> sendResponse
val enqIsMaskRequest: Bool = !enqueue.bits.sSendResponse
// not maskUnit && not send out
Expand All @@ -59,27 +52,10 @@ class MaskExchangeUnit(parameter: LaneParameter) extends Module {

maskRequestToLSU := enqueue.bits.loadStore

// type change MaskUnitExeResponse -> LaneStage3Enqueue
val maskUnitResponsePipeType: LaneStage3Enqueue = Wire(chiselTypeOf(enqueue.bits))
maskUnitResponsePipeType := DontCare
maskUnitResponsePipeType.groupCounter := maskUnitWriteQueue.deq.bits.writeData.groupCounter
maskUnitResponsePipeType.data := maskUnitWriteQueue.deq.bits.writeData.data
maskUnitResponsePipeType.mask := maskUnitWriteQueue.deq.bits.writeData.mask
maskUnitResponsePipeType.vd := maskUnitWriteQueue.deq.bits.writeData.vd
maskUnitResponsePipeType.instructionIndex := maskUnitWriteQueue.deq.bits.index
maskUnitResponsePipeType.ffoByOtherLanes := maskUnitWriteQueue.deq.bits.ffoByOther
maskUnitResponsePipeType.pipeData := maskUnitWriteQueue.deq.bits.pipeData

maskUnitWriteQueue.enq.valid := maskUnitResponse.valid
maskUnitWriteQueue.enq.bits := maskUnitResponse.bits

val enqWantToSend: Bool = enqueue.valid && enqSendToDeq
val maskRequestEnqReady: Bool = !enqIsMaskRequest || maskRequestAllow

dequeue.valid := (enqueue.valid && enqSendToDeq) || maskUnitWriteQueue.deq.valid
dequeue.bits := Mux(enqWantToSend, enqueue.bits, maskUnitResponsePipeType)
dequeue.valid := enqueue.valid && enqSendToDeq
dequeue.bits := enqueue.bits
enqueue.ready := Mux(enqSendToDeq, dequeue.ready, maskRequestEnqReady)
maskUnitWriteQueue.deq.ready := dequeue.ready && !enqWantToSend
tokenIO.maskResponseRelease := maskUnitWriteQueue.deq.fire

tokenIO.maskResponseRelease := DontCare
}
40 changes: 25 additions & 15 deletions t1/src/laneStage/SlotTokenManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,13 @@ class SlotTokenManager(parameter: LaneParameter) extends Module {
val topWriteEnq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val topWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))
val fromMask: Bool = IO(Input(Bool()))

@public
val lsuWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val maskWriteDeq: ValidIO[UInt] = IO(Flipped(Valid(UInt(parameter.instructionIndexBits.W))))

@public
val instructionValid: UInt = IO(Output(UInt(parameter.chainingSize.W)))
Expand Down Expand Up @@ -188,15 +194,10 @@ class SlotTokenManager(parameter: LaneParameter) extends Module {
val responseDoDeq: UInt =
maskAnd(responseReport.valid, indexToOH(responseReport.bits, parameter.chainingSize)).asUInt

val feedbackIndexSelect = Mux(
responseIndexQueue.deq.valid,
responseIndexQueue.deq.bits,
responseFeedbackReport.bits
)
val feedbackDoDeq: UInt =
maskAnd(responseFeedbackReport.valid, indexToOH(responseFeedbackReport.bits, parameter.chainingSize)).asUInt

writeEnqSelect := writeDoEnq | feedbackDoDeq
writeEnqSelect := writeDoEnq

val pendingResponse = tokenUpdate(responseToken, responseDoEnq, responseDoDeq)
// todo: Precise feedback
Expand All @@ -217,16 +218,25 @@ class SlotTokenManager(parameter: LaneParameter) extends Module {

val instructionInWritePipe: UInt = tokenUpdate(writePipeToken, writePipeEnq, writePipeDeq)

// top write token
val topWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))
val topWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt
// lsu & mask write token
val lsuWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))
val maskWriteToken: Seq[UInt] = Seq.tabulate(parameter.chainingSize)(_ => RegInit(0.U(tokenWith.W)))

val lsuWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid && !fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt

val maskWriteDoEnq: UInt =
maskAnd(topWriteEnq.valid && fromMask, indexToOH(topWriteEnq.bits, parameter.chainingSize)).asUInt

val lsuWriteDoDeq: UInt =
maskAnd(lsuWriteDeq.valid, indexToOH(lsuWriteDeq.bits, parameter.chainingSize)).asUInt

val topWriteDoDeq: UInt =
maskAnd(topWriteDeq.valid, indexToOH(topWriteDeq.bits, parameter.chainingSize)).asUInt
val maskWriteDoDeq: UInt =
maskAnd(maskWriteDeq.valid, indexToOH(maskWriteDeq.bits, parameter.chainingSize)).asUInt

val instructionInTopWritePipe = tokenUpdate(topWriteToken, topWriteDoEnq, topWriteDoDeq)
val lsuInTopWrite = tokenUpdate(lsuWriteToken, lsuWriteDoEnq, lsuWriteDoDeq)
val maskInTopWrite = tokenUpdate(maskWriteToken, maskWriteDoEnq, maskWriteDoDeq)

dataInWritePipe := instructionInWritePipe | instructionInTopWritePipe
dataInWritePipe := instructionInWritePipe | lsuInTopWrite | maskInTopWrite
instructionValid := dataInWritePipe | instructionInSlot
}
22 changes: 18 additions & 4 deletions t1/src/mask/MaskUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,15 @@ class MaskUnit(parameter: T1Parameter) extends Module {
}

@public
val exeResp: Seq[ValidIO[MaskUnitExeResponse]] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Valid(new MaskUnitExeResponse(parameter.laneParam)))
val exeResp: Seq[ValidIO[VRFWriteRequest]] = Seq.tabulate(parameter.laneNumber) { _ =>
IO(Valid(
new VRFWriteRequest(
parameter.vrfParam.regNumBits,
parameter.laneParam.vrfOffsetBits,
parameter.instructionIndexBits,
parameter.datapathWidth
)
))
}

@public
Expand Down Expand Up @@ -1081,8 +1088,15 @@ class MaskUnit(parameter: T1Parameter) extends Module {
val writePort = exeResp(index)
queue.deq.ready := !tokenCounter.asBools.last
writePort.valid := tokenAllow
writePort.bits := queue.deq.bits
writePort.bits.writeData.vd := instReg.vd
writePort.bits.last := DontCare
writePort.bits.instructionIndex := instReg.instructionIndex
writePort.bits.data := Mux(queue.deq.bits.ffoByOther, queue.deq.bits.pipeData, queue.deq.bits.writeData.data)
writePort.bits.mask := queue.deq.bits.writeData.mask
writePort.bits.vd := instReg.vd + queue.deq.bits.writeData.groupCounter(
parameter.laneParam.groupNumberBits - 1,
parameter.laneParam.vrfOffsetBits
)
writePort.bits.offset := queue.deq.bits.writeData.groupCounter
}
waiteStageDeqReady := writeQueue.zipWithIndex.map { case (queue, index) =>
!WillWriteLane(index) || queue.enq.ready
Expand Down
4 changes: 2 additions & 2 deletions t1/src/vrf/VRF.scala
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
@public
val writeCheck: Vec[LSUWriteCheck] = IO(
Vec(
parameter.chainingSize + 3,
parameter.chainingSize + 4,
Input(
new LSUWriteCheck(
parameter.regNumBits,
Expand All @@ -214,7 +214,7 @@ class VRF(val parameter: VRFParam) extends Module with SerializableModule[VRFPar
)

@public
val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 3, Output(Bool())))
val writeAllow: Vec[Bool] = IO(Vec(parameter.chainingSize + 4, Output(Bool())))

/** when instruction is fired, record it in the VRF for chaining. */
@public
Expand Down

0 comments on commit 43a260a

Please sign in to comment.