From 4ccf80abdfbf69dd4e8a641bb213378396b62df1 Mon Sep 17 00:00:00 2001 From: Simon Dardis Date: Thu, 7 Apr 2022 00:31:33 +0100 Subject: [PATCH] [MIPS] Initial support for MIPS-I load delay slots LLVM so far has only supported the MIPS-II and above architectures. MIPS-II is pretty close to MIPS-I, the major difference being that "load" instructions always take one extra instruction slot to propogate to registers. This patch adds support for MIPS-I by adding hazard handling for load delay slots, alongside MIPSR6 forbidden slots and FPU slots, inserting a NOP instruction between a load and any instruction immediately following that reads the load's destination register. I also included a simple regression test. Since no existing tests target MIPS-I, those all still pass. Issue ref: https://github.com/simias/psx-sdk-rs/issues/1 I also tested by building a simple demo app with Clang and running it in an emulator. Patch by: @impiaaa Differential Revision: https://reviews.llvm.org/D122427 --- llvm/lib/Target/Mips/MipsBranchExpansion.cpp | 29 +++++++++---- llvm/lib/Target/Mips/MipsInstrInfo.cpp | 28 +++++++++++++ llvm/lib/Target/Mips/MipsInstrInfo.h | 7 ++++ llvm/lib/Target/Mips/MipsSubtarget.cpp | 13 ++++-- llvm/lib/Target/Mips/MipsSubtarget.h | 3 ++ llvm/test/CodeGen/Mips/cpus.ll | 5 ++- llvm/test/CodeGen/Mips/mips1-load-delay.ll | 44 ++++++++++++++++++++ 7 files changed, 116 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/mips1-load-delay.ll diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp index c3ee656e58e2..295226317034 100644 --- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp +++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp @@ -36,7 +36,8 @@ /// /// Regarding compact branch hazard prevention: /// -/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below. +/// Hazards handled: forbidden slots for MIPSR6, FPU slots for MIPS3 and below, +/// load delay slots for MIPS1. /// /// A forbidden slot hazard occurs when a compact branch instruction is executed /// and the adjacent instruction in memory is a control transfer instruction @@ -164,6 +165,7 @@ class MipsBranchExpansion : public MachineFunctionPass { bool handleSlot(Pred Predicate, Safe SafeInSlot); bool handleForbiddenSlot(); bool handleFPUDelaySlot(); + bool handleLoadDelaySlot(); bool handlePossibleLongBranch(); const MipsSubtarget *STI; @@ -762,7 +764,6 @@ bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) { } if (LastInstInFunction || !SafeInSlot(*IInSlot, *I)) { - MachineBasicBlock::instr_iterator Iit = I->getIterator(); if (std::next(Iit) == FI->end() || std::next(Iit)->getOpcode() != Mips::NOP) { @@ -801,6 +802,18 @@ bool MipsBranchExpansion::handleFPUDelaySlot() { }); } +bool MipsBranchExpansion::handleLoadDelaySlot() { + // Load delay slot hazards are only for MIPS1. + if (STI->hasMips2()) + return false; + + return handleSlot( + [this](auto &I) -> bool { return TII->HasLoadDelaySlot(I); }, + [this](auto &IInSlot, auto &I) -> bool { + return TII->SafeInLoadDelaySlot(IInSlot, I); + }); +} + bool MipsBranchExpansion::handlePossibleLongBranch() { if (STI->inMips16Mode() || !STI->enableLongBranchPass()) return false; @@ -877,19 +890,21 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) { MFp = &MF; ForceLongBranchFirstPass = ForceLongBranch; - // Run these two at least once + // Run these at least once. bool longBranchChanged = handlePossibleLongBranch(); bool forbiddenSlotChanged = handleForbiddenSlot(); bool fpuDelaySlotChanged = handleFPUDelaySlot(); + bool loadDelaySlotChanged = handleLoadDelaySlot(); - bool Changed = - longBranchChanged || forbiddenSlotChanged || fpuDelaySlotChanged; + bool Changed = longBranchChanged || forbiddenSlotChanged || + fpuDelaySlotChanged || loadDelaySlotChanged; - // Then run them alternatively while there are changes + // Then run them alternatively while there are changes. while (forbiddenSlotChanged) { longBranchChanged = handlePossibleLongBranch(); fpuDelaySlotChanged = handleFPUDelaySlot(); - if (!longBranchChanged && !fpuDelaySlotChanged) + loadDelaySlotChanged = handleLoadDelaySlot(); + if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged) break; forbiddenSlotChanged = handleForbiddenSlot(); } diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp index 8d94f29c1cf9..8ecb1e4a2394 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp @@ -597,6 +597,18 @@ bool MipsInstrInfo::SafeInFPUDelaySlot(const MachineInstr &MIInSlot, return true; } +/// Predicate for distinguishing instructions that are hazardous in a load delay +/// slot. Consider inline assembly as unsafe as well. +bool MipsInstrInfo::SafeInLoadDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &LoadMI) const { + if (MIInSlot.isInlineAsm()) + return false; + + return !llvm::any_of(LoadMI.defs(), [&](const MachineOperand &Op) { + return Op.isReg() && MIInSlot.readsRegister(Op.getReg()); + }); +} + /// Predicate for distingushing instructions that have forbidden slots. bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const { return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0; @@ -621,6 +633,22 @@ bool MipsInstrInfo::HasFPUDelaySlot(const MachineInstr &MI) const { } } +/// Predicate for distingushing instructions that have load delay slots. +bool MipsInstrInfo::HasLoadDelaySlot(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case Mips::LB: + case Mips::LBu: + case Mips::LH: + case Mips::LHu: + case Mips::LW: + case Mips::LWR: + case Mips::LWL: + return true; + default: + return false; + } +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h index 46c1b73d512f..54281faa71e8 100644 --- a/llvm/lib/Target/Mips/MipsInstrInfo.h +++ b/llvm/lib/Target/Mips/MipsInstrInfo.h @@ -96,12 +96,19 @@ class MipsInstrInfo : public MipsGenInstrInfo { bool SafeInFPUDelaySlot(const MachineInstr &MIInSlot, const MachineInstr &FPUMI) const; + /// Predicate to determine if an instruction can go in a load delay slot. + bool SafeInLoadDelaySlot(const MachineInstr &MIInSlot, + const MachineInstr &LoadMI) const; + /// Predicate to determine if an instruction has a forbidden slot. bool HasForbiddenSlot(const MachineInstr &MI) const; /// Predicate to determine if an instruction has an FPU delay slot. bool HasFPUDelaySlot(const MachineInstr &MI) const; + /// Predicate to determine if an instruction has a load delay slot. + bool HasLoadDelaySlot(const MachineInstr &MI) const; + /// Insert nop instruction when hazard condition is found void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp index c285385a19dd..5c6127550bac 100644 --- a/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -64,6 +64,7 @@ bool MipsSubtarget::MSAWarningPrinted = false; bool MipsSubtarget::VirtWarningPrinted = false; bool MipsSubtarget::CRCWarningPrinted = false; bool MipsSubtarget::GINVWarningPrinted = false; +bool MipsSubtarget::MIPS1WarningPrinted = false; void MipsSubtarget::anchor() {} @@ -91,10 +92,14 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, if (MipsArchVersion == MipsDefault) MipsArchVersion = Mips32; - // Don't even attempt to generate code for MIPS-I and MIPS-V. They have not - // been tested and currently exist for the integrated assembler only. - if (MipsArchVersion == Mips1) - report_fatal_error("Code generation for MIPS-I is not implemented", false); + // MIPS-I has not been tested. + if (MipsArchVersion == Mips1 && !MIPS1WarningPrinted) { + errs() << "warning: MIPS-I support is experimental\n"; + MIPS1WarningPrinted = true; + } + + // Don't even attempt to generate code for MIPS-V. It has not + // been tested and currently exists for the integrated assembler only. if (MipsArchVersion == Mips5) report_fatal_error("Code generation for MIPS-V is not implemented", false); diff --git a/llvm/lib/Target/Mips/MipsSubtarget.h b/llvm/lib/Target/Mips/MipsSubtarget.h index 84d3a6866219..ec8ca64c8ce8 100644 --- a/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/llvm/lib/Target/Mips/MipsSubtarget.h @@ -59,6 +59,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Used to avoid printing ginv warnings multiple times. static bool GINVWarningPrinted; + // Used to avoid printing Mips1 warnings multiple times. + static bool MIPS1WarningPrinted; + // Used to avoid printing virt warnings multiple times. static bool VirtWarningPrinted; diff --git a/llvm/test/CodeGen/Mips/cpus.ll b/llvm/test/CodeGen/Mips/cpus.ll index 5f9740c2ce86..995b0fd82010 100644 --- a/llvm/test/CodeGen/Mips/cpus.ll +++ b/llvm/test/CodeGen/Mips/cpus.ll @@ -4,6 +4,9 @@ ; RUN: | llvm-readelf -A - | FileCheck %s --check-prefix=GENERIC ; GENERIC: ISA: MIPS32 +; RUN: llc -mtriple=mips -mcpu=mips1 -filetype=obj < %s \ +; RUN: | llvm-readelf -A - | FileCheck %s --check-prefix=MIPS1 +; MIPS1: ISA: MIPS1 ; RUN: llc -mtriple=mips -mcpu=mips2 -filetype=obj < %s \ ; RUN: | llvm-readelf -A - | FileCheck %s --check-prefix=MIPS2 ; MIPS2: ISA: MIPS2 @@ -57,8 +60,6 @@ ; Check that we reject CPUs that are not implemented. -; RUN: not --crash llc < %s -o /dev/null -mtriple=mips -mcpu=mips1 2>&1 \ -; RUN: | FileCheck %s --check-prefix=ERROR ; RUN: not --crash llc < %s -o /dev/null -mtriple=mips64 -mcpu=mips5 2>&1 \ ; RUN: | FileCheck %s --check-prefix=ERROR diff --git a/llvm/test/CodeGen/Mips/mips1-load-delay.ll b/llvm/test/CodeGen/Mips/mips1-load-delay.ll new file mode 100644 index 000000000000..516ecf70e4de --- /dev/null +++ b/llvm/test/CodeGen/Mips/mips1-load-delay.ll @@ -0,0 +1,44 @@ +; RUN: llc < %s -mtriple=mips -mcpu=mips1 | FileCheck %s -check-prefixes=ALL,MIPS1 +; RUN: llc < %s -mtriple=mips -mcpu=mips2 | FileCheck %s -check-prefixes=ALL,MIPS2 +; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 | FileCheck %s -check-prefixes=ALL,MIPS32 +target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" +target triple = "mipsel-unknown-unknown-elf" + +; Function Attrs: noinline nounwind optnone +define dso_local i32 @add_two_pointers(i32* %a, i32* %b) #0 { +entry: +; ALL-LABEL: add_two_pointers: + %a.addr = alloca i32*, align 4 + %b.addr = alloca i32*, align 4 + store i32* %a, i32** %a.addr, align 4 + store i32* %b, i32** %b.addr, align 4 + %0 = load i32*, i32** %a.addr, align 4 + %1 = load i32, i32* %0, align 4 + ; ALL: lw $1, 4($fp) + ; MIPS1: nop + ; MIPS2-NOT: nop + ; MIPS32-NOT: nop + ; ALL: lw $1, 0($1) + %2 = load i32*, i32** %b.addr, align 4 + %3 = load i32, i32* %2, align 4 + ; ALL: lw $2, 0($fp) + ; MIPS1: nop + ; MIPS2-NOT: nop + ; MIPS32-NOT: nop + ; ALL: lw $2, 0($2) + %add = add nsw i32 %1, %3 + ret i32 %add + ; ALL: lw $ra, 12($sp) + ; MIPS1: nop + ; MIPS2-NOT: nop + ; MIPS32-NOT: nop + ; ALL: jr $ra +} + +attributes #0 = { noinline nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-noabicalls" } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +