Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rocket&t1] connect vxsat. #725

Merged
merged 1 commit into from
Aug 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rocketv/src/csr/V.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class V(vlen: Int, hypervisor: Boolean) {
case "vlenb" => UInt(vlenbWidth.W)
case "vstart" => UInt(vlWidth.W)
case "vxrm" => UInt(2.W)
case "vxsat" => UInt(2.W)
case "vxsat" => Bool()
}
// https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#311-state-of-vector-extension-at-reset
def reset(content: String): Option[UInt] = content match {
Expand Down
3 changes: 3 additions & 0 deletions t1/src/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ class InstructionControl(instIndexWidth: Int, laneSize: Int) extends Bundle {
* TODO: move to `state`.
*/
val endTag: Vec[Bool] = Vec(laneSize + 1, Bool())

val vxsat: Bool = Bool()
}

class ExtendInstructionType extends Bundle {
Expand Down Expand Up @@ -628,6 +630,7 @@ class ExecutionUnitRecord(parameter: LaneParameter)(isLastSlot: Boolean) extends
val laneIndex: UInt = UInt(parameter.laneNumberBits.W)
// pipe state
val decodeResult: DecodeBundle = Decoder.bundle(parameter.decoderParam)
val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W)
}

class SlotRequestToVFU(parameter: LaneParameter) extends Bundle {
Expand Down
16 changes: 16 additions & 0 deletions t1/src/Lane.scala
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
/** for each instruction in the slot, response to top when instruction is finished in this lane. */
@public
val instructionFinished: UInt = IO(Output(UInt(parameter.chainingSize.W)))
@public
val vxsatReport: UInt = IO(Output(UInt(parameter.chainingSize.W)))

/** V0 update in the lane should also update [[T1.v0]] */
@public
Expand Down Expand Up @@ -496,6 +498,14 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
val instructionValid: UInt = Wire(UInt(parameter.chainingSize.W))
val instructionValidNext: UInt = RegNext(instructionValid, 0.U)

val vxsatResult: UInt = RegInit(0.U(parameter.chainingSize.W))
vxsatReport := vxsatResult

// Overflow occurs
val vxsatEnq: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W)))
// vxsatEnq and instructionFinished cannot happen at the same time
vxsatResult := (vxsatEnq.reduce(_ | _) | vxsatResult) & (~instructionFinished).asUInt

/** assert when a instruction will not use mask unit */
val instructionUnrelatedMaskUnitVec: Vec[UInt] = Wire(Vec(parameter.chainingSize, UInt(parameter.chainingSize.W)))

Expand Down Expand Up @@ -744,6 +754,12 @@ class Lane(val parameter: LaneParameter) extends Module with SerializableModule[
executionUnit.vfuRequest.ready := executeEnqueueFire(index)
executionUnit.dataResponse := responseVec(index)

vxsatEnq(index) := Mux(
executionUnit.dataResponse.valid &&
(executionUnit.dataResponse.bits.clipFail ## executionUnit.dataResponse.bits.vxsat).orR,
UIntToOH(executionUnit.responseIndex(parameter.instructionIndexBits - 2, 0)),
0.U(parameter.chainingSize.W)
)
when(executionUnit.dequeue.valid)(assert(stage2.dequeue.valid))
stage3.enqueue.valid := executionUnit.dequeue.valid
executionUnit.dequeue.ready := stage3.enqueue.ready
Expand Down
2 changes: 1 addition & 1 deletion t1/src/OtherUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,5 @@ class OtherUnit(val parameter: OtherUnitParam) extends VFUModule(parameter) with
)
response.data := result
response.ffoSuccess := ffo.resp.valid && isffo
response.clipFail := DontCare
response.clipFail := roundResultOverlap || differentSign
}
11 changes: 10 additions & 1 deletion t1/src/T1.scala
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ class T1(val parameter: T1Parameter)
*/
val instructionFinished: Vec[Vec[Bool]] = Wire(Vec(parameter.laneNumber, Vec(parameter.chainingSize, Bool())))

val vxsatReportVec: Vec[UInt] = Wire(Vec(parameter.laneNumber, UInt(parameter.chainingSize.W)))
val vxsatReport = vxsatReportVec.reduce(_ | _)

// todo: 把lsu也放decode里去
val maskUnitType: Bool = decodeResult(Decoder.maskUnit) && requestRegDequeue.bits.instruction(6)
val maskDestination = decodeResult(Decoder.maskDestination)
Expand Down Expand Up @@ -627,6 +630,7 @@ class T1(val parameter: T1Parameter)
* this signal is used to update the `control.endTag`.
*/
val lsuFinished: Bool = ohCheck(lsu.lastReport, control.record.instructionIndex, parameter.chainingSize)
val vxsatUpdate = ohCheck(vxsatReport, control.record.instructionIndex, parameter.chainingSize)

val dataInWritePipeCheck = ohCheck(dataInWritePipe, control.record.instructionIndex, parameter.chainingSize)
// instruction is allocated to this slot.
Expand All @@ -641,6 +645,7 @@ class T1(val parameter: T1Parameter)
control.state.wLast := false.B
control.state.sCommit := false.B
control.state.wVRFWrite := !requestReg.bits.decodeResult(Decoder.maskUnit)
control.vxsat := false.B
// two different initial states for endTag:
// for load/store instruction, use the last bit to indicate whether it is the last instruction
// for other instructions, use MSB to indicate whether it is the last instruction
Expand Down Expand Up @@ -668,6 +673,9 @@ class T1(val parameter: T1Parameter)
control.endTag.zip(instructionFinished.map(_(index)) :+ lsuFinished).foreach {
case (d, c) => d := d || c
}
when(vxsatUpdate) {
control.vxsat := true.B
}
}
// logic like mask&reduce will be put to last slot
// TODO: review later
Expand Down Expand Up @@ -1546,6 +1554,7 @@ class T1(val parameter: T1Parameter)
instructionFinished(index).zip(slots.map(_.record.instructionIndex)).foreach {
case (d, f) => d := (UIntToOH(f(parameter.instructionIndexBits - 2, 0)) & lane.instructionFinished).orR
}
vxsatReportVec(index) := lane.vxsatReport
val v0ForThisLane: Seq[UInt] = regroupV0.map(rv => cutUInt(rv, parameter.vLen / parameter.laneNumber)(index))
val v0SelectBySew = Mux1H(UIntToOH(lane.maskSelectSew)(2, 0), v0ForThisLane)
lane.maskInput := cutUInt(v0SelectBySew, parameter.datapathWidth)(lane.maskSelect)
Expand Down Expand Up @@ -1685,7 +1694,7 @@ class T1(val parameter: T1Parameter)
retire := slotCommit.asUInt.orR
io.retire.rd.bits.rdData := Mux(ffoType, ffoIndexReg.bits, dataResult.bits)
// TODO: csr retire.
io.retire.csr.bits.vxsat := DontCare
io.retire.csr.bits.vxsat := (slotCommit.asUInt & VecInit(slots.map(_.vxsat)).asUInt).orR
io.retire.csr.bits.fflag := DontCare
io.retire.csr.valid := false.B
io.retire.mem.valid := (slotCommit.asUInt & VecInit(slots.map(_.record.isLoadStore)).asUInt).orR
Expand Down
6 changes: 6 additions & 0 deletions t1/src/laneStage/Distributor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Distributor[T <: SlotRequestToVFU, B <: VFUResponseToSlot](enqueue: T, deq
val requestReg: ValidIO[SlotRequestToVFU] = RegInit(0.U.asTypeOf(Valid(enqueue)))
val sendRequestValid: Bool = RegInit(false.B)
val ffoSuccess: Bool = RegInit(false.B)
val vxsatResult = RegInit(false.B)
val responseData: UInt = RegInit(0.U(enqueue.src.head.getWidth.W))
val executeIndex = RegInit(0.U(2.W))

Expand Down Expand Up @@ -163,13 +164,18 @@ class Distributor[T <: SlotRequestToVFU, B <: VFUResponseToSlot](enqueue: T, deq
when(responseFromVfu.fire || requestFromSlot.fire) {
ffoSuccess := updateFFO && !requestFromSlot.fire
}
val updateVxsat = (responseFromVfu.bits.vxsat ## responseFromVfu.bits.clipFail).orR || vxsatResult
when(responseFromVfu.fire || requestFromSlot.fire) {
vxsatResult := updateVxsat && !requestFromSlot.fire
}

requestFromSlot.ready := !requestReg.valid || isLastResponse

responseWire.valid := isLastResponse && requestReg.valid
responseWire.bits.data := resultUpdate
responseWire.bits.ffoSuccess := updateFFO
responseWire.bits.tag := requestReg.bits.tag
responseWire.bits.vxsat := updateVxsat

val pipeResponse: ValidIO[VFUResponseToSlot] = RegNext(responseWire, 0.U.asTypeOf(responseToSlot))
responseToSlot <> pipeResponse
Expand Down
7 changes: 7 additions & 0 deletions t1/src/laneStage/LaneExecutionBridge.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class LaneExecuteRequest(parameter: LaneParameter, isLastSlot: Boolean) extends
val maskType: Bool = Bool()
// Newly added in LaneExecutionBridge
val laneIndex: UInt = UInt(parameter.laneNumberBits.W)
val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W)
}

class LaneExecuteResponse(parameter: LaneParameter, isLastSlot: Boolean) extends Bundle {
Expand All @@ -45,6 +46,7 @@ class ExecutionBridgeRecordQueue(parameter: LaneParameter, isLastSlot: Boolean)
// pipe state
val decodeResult: DecodeBundle = Decoder.bundle(parameter.decoderParam)
val vSew1H: UInt = UInt(3.W)
val instructionIndex: UInt = UInt(parameter.instructionIndexBits.W)
}

@instantiable
Expand All @@ -71,6 +73,8 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd
val executeDecode: DecodeBundle = IO(Output(Decoder.bundle(parameter.decoderParam)))
@public
val responseDecode: DecodeBundle = IO(Output(Decoder.bundle(parameter.decoderParam)))
@public
val responseIndex: UInt = IO(Output(UInt(parameter.instructionIndexBits.W)))

val executionRecord: ExecutionUnitRecord = RegInit(0.U.asTypeOf(new ExecutionUnitRecord(parameter)(isLastSlot)))
val executionRecordValid = RegInit(false.B)
Expand Down Expand Up @@ -152,6 +156,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd
executionRecord.csr := enqueue.bits.csr
executionRecord.maskType := enqueue.bits.maskType
executionRecord.laneIndex := enqueue.bits.laneIndex
executionRecord.instructionIndex := enqueue.bits.instructionIndex
}

/** collapse the dual SEW size operand for cross read.
Expand Down Expand Up @@ -327,6 +332,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd
}
recordQueue.io.enq.bits.decodeResult := executionRecord.decodeResult
recordQueue.io.enq.bits.vSew1H := executionRecord.vSew1H
recordQueue.io.enq.bits.instructionIndex := executionRecord.instructionIndex
//--- vfu <-> write queue start ---

/** same as [[doubleExecutionInRecord]]
Expand Down Expand Up @@ -540,6 +546,7 @@ class LaneExecutionBridge(parameter: LaneParameter, isLastSlot: Boolean, slotInd
queue.io.enq.bits.fpReduceValid.foreach(_ := !waitFirstValidFire.get)
recordQueue.io.deq.ready := dataResponse.valid || (recordNotExecute && queue.io.enq.ready)
responseDecode := recordQueue.io.deq.bits.decodeResult
responseIndex := recordQueue.io.deq.bits.instructionIndex
queue.io.enq.valid :=
(recordQueue.io.deq.valid &&
((dataResponse.valid && reduceReady &&
Expand Down