Skip to content

Commit

Permalink
[AIE2] Use OR to mimic MOV when copying GPR to GPR
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnamtibrewala committed Nov 11, 2024
1 parent 1b74ec6 commit 6b76c28
Show file tree
Hide file tree
Showing 29 changed files with 362 additions and 268 deletions.
10 changes: 10 additions & 0 deletions llvm/lib/Target/AIE/AIE2GenInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ class AIE2_alu_r_rr_inst_alu < bits<4> op, dag outs, dag ins, string opcodestr =
let alu = {mRx0,mRx,mRy,op,0b1};
}

// To be used to replace MOV $rx $ry into OR $rx $ry $ry
class AIE2_alu_r_rr_inst_alu_mov_gpr < bits<4> op, dag outs, dag ins, string opcodestr = "", string argstr = "">:
AIE2_inst_alu_instr32 <outs, ins, opcodestr, argstr> {
bits<5> mRx;
bits<4> eBinArith;
bits<5> mRx0;

let alu = {mRx0,mRx,mRx0,op,0b1};
}

class AIE2_lng_cg_lng < dag outs, dag ins, string opcodestr = "", string argstr = "">:
AIE2_inst_lng_instr48 <outs, ins, opcodestr, argstr> {
bits<32> i;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIE2GenInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ let Itinerary = II_NE in {
}
let Itinerary = II_OR in {
def OR : AIE2_alu_r_rr_inst_alu<0b0101, (outs eR:$mRx), (ins eR:$mRx0, eR:$mRy), "or", "$mRx, $mRx0, $mRy">;
let isCodeGenOnly = 1 in
def MOV_OR : AIE2_alu_r_rr_inst_alu_mov_gpr<0b0101, (outs eR:$mRx), (ins eR:$mRx0), "or", "$mRx, $mRx0, $mRx0">;
}
let Itinerary = II_SBC in {
let Defs = [srCarry], Uses = [srCarry] in
Expand Down
14 changes: 12 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ void AIE2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// TODO : add support for 128-bit mask register
if (AIE2::mMvSclSrcRegClass.contains(SrcReg) &&
AIE2::mMvSclDstRegClass.contains(DstReg)) {
BuildMI(MBB, MBBI, DL, get(AIE2::MOV_mv_scl), DstReg)
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else if ((AIE2::eLRegClass.contains(SrcReg)) &&
(AIE2::eLRegClass.contains(DstReg))) {
Expand Down Expand Up @@ -732,6 +733,14 @@ unsigned AIE2InstrInfo::getConstantMovOpcode(MachineRegisterInfo &MRI,
dbgs() << "DstRegClass ID: " << DstRegClass->getID() << "\n";
}

unsigned AIE2InstrInfo::getScalarMovOpcode(Register DstReg,
Register SrcReg) const {
return (AIE2::eRRegClass.contains(SrcReg) &&
AIE2::eRRegClass.contains(DstReg))
? AIE2::MOV_SCL_pseudo
: AIE2::MOV_mv_scl;
}

unsigned AIE2InstrInfo::getCycleSeparatorOpcode() const {
return AIE2::CYCLE_SEPARATOR;
}
Expand Down Expand Up @@ -829,7 +838,8 @@ bool AIE2InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AIE2::PseudoMove: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
BuildMI(MBB, MI, DL, get(AIE2::MOV_mv_scl), Dst)
const unsigned MOVSclOpcode = getScalarMovOpcode(Dst, Src);
BuildMI(MBB, MI, DL, get(MOVSclOpcode), Dst)
.addReg(Src, getKillRegState(true));
MI.eraseFromParent();
return true;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIE2InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class AIE2InstrInfo : public AIE2GenInstrInfo {
unsigned getPseudoMoveOpcode() const override;
unsigned getConstantMovOpcode(MachineRegisterInfo &MRI, unsigned int Reg,
APInt &Val) const override;
unsigned getScalarMovOpcode(Register DstReg, Register SrcReg) const override;
unsigned getGenericAddVectorEltOpcode() const override;
unsigned getGenericInsertVectorEltOpcode() const override;
unsigned getGenericExtractVectorEltOpcode(bool SignExt) const override;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIE2MultiSlotPseudoInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ let mayLoad = false, mayStore = false, hasSideEffects = false, Itinerary = II_PA
}
}

// We use OR to mimic MOV behavior, and not ADD with 0 because ADD uses $srCarry register
let Itinerary = II_MOV_SCL_RS, isMoveReg = 1, hasSideEffects = false, mayLoad = false, mayStore = false in {
def MOV_SCL_pseudo : MultiSlot_Pseudo<(outs eR:$mRx), (ins eR:$mRx0),
"mov_scl_pseudo", "$mRx, $mRx0",
[MOV_mv_scl, MOV_OR]>;
}

// Pseudo MOV
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, Itinerary = II_MOV,
hasSideEffects = false, mayLoad = false, mayStore = false in {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
unsigned int Reg, APInt &Val) const {
llvm_unreachable("Target didn't implement getConstantMovOpcode");
}
/// Return Multi-Slot Pseudo opcode based on Reg type
virtual unsigned getScalarMovOpcode(Register DstReg, Register SrcReg) const {
llvm_unreachable("Target didn't implement getScalarMovOpcode");
}
/// Returns the opcode for CYCLE_SEPARATOR meta instruction.
/// Used for debugging purposes
virtual unsigned getCycleSeparatorOpcode() const {
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AIE/aie2/addlog.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ define dso_local ptr @_Z11test_selectbPvS_(i1 noundef zeroext %sel, ptr readnone
; CHECK-LABEL: _Z11test_selectbPvS_:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm
; CHECK-NEXT: mov r27, r0 // Delay Slot 5
; CHECK-NEXT: mov r0, p1 // Delay Slot 4
; CHECK-NEXT: nopa ; ret lr ; nopm
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: or r27, r0, r0; mov r0, p1 // Delay Slot 4
; CHECK-NEXT: mov r1, p2 // Delay Slot 3
; CHECK-NEXT: sel.nez r0, r0, r1, r27 // Delay Slot 2
; CHECK-NEXT: mov p0, r0 // Delay Slot 1
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AIE/aie2/bfloat16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ define dso_local noundef <32 x bfloat> @bneg_v32bf16(<32 x bfloat> noundef %a)
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r0, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: vbneg_ltz.s16 x0, r16, x2 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r0, r16, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r0 // Delay Slot 1
entry:
%0 = bitcast <32 x bfloat> %a to <32 x i16>
Expand Down Expand Up @@ -114,9 +114,9 @@ define dso_local noundef <32 x bfloat> @max_lt_v32bf16(<32 x bfloat> noundef %a,
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
; CHECK-NEXT: mov r0, r16 // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: vmax_lt.bf16 x0, r16, x2, x4 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: or r0, r16, r16 // Delay Slot 3
; CHECK-NEXT: st r16, [p0, #0] // Delay Slot 2
; CHECK-NEXT: mov r16, r0 // Delay Slot 1
entry:
Expand All @@ -134,9 +134,9 @@ define dso_local noundef <32 x bfloat> @max_v32bf16D(<32 x bfloat> noundef %a, <
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r0, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: vmax_lt.bf16 x0, r16, x2, x4 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r0, r16, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r0 // Delay Slot 1
entry:
%0 = tail call { <32 x bfloat>, i32 } @llvm.aie2.vmax.ltbf16(<32 x bfloat> %a, <32 x bfloat> %b)
Expand All @@ -149,9 +149,9 @@ define dso_local noundef <32 x bfloat> @min_ge_v32bf16(<32 x bfloat> noundef %a,
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
; CHECK-NEXT: mov r0, r16 // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: vmin_ge.bf16 x0, r16, x2, x4 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: or r0, r16, r16 // Delay Slot 3
; CHECK-NEXT: st r16, [p0, #0] // Delay Slot 2
; CHECK-NEXT: mov r16, r0 // Delay Slot 1
entry:
Expand All @@ -169,9 +169,9 @@ define dso_local noundef <32 x bfloat> @min_v32bf16(<32 x bfloat> noundef %a, <3
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r0, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: vmin_ge.bf16 x0, r16, x2, x4 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r0, r16, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r0 // Delay Slot 1
entry:
%0 = tail call { <32 x bfloat>, i32 } @llvm.aie2.vmin.gebf16(<32 x bfloat> %a, <32 x bfloat> %b)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/brcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ define i32 @br_diamond_complex_end(i32 %a, i32 %b, i32 %v, i32* nocapture writ
; CHECK-NEXT: nopa ; nopb ; geu r0, r2, r1
; CHECK-NEXT: jnz r0, #.LBB3_2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: paddb [sp], #32 // Delay Slot 4
; CHECK-NEXT: st r16, [sp, #-32] // 4-byte Folded Spill Delay Slot 3
; CHECK-NEXT: mov r16, r3 // Delay Slot 2
; CHECK-NEXT: st lr, [sp, #-28] // 4-byte Folded Spill Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: paddb [sp], #32 // Delay Slot 3
; CHECK-NEXT: st r16, [sp, #-32] // 4-byte Folded Spill Delay Slot 2
; CHECK-NEXT: st lr, [sp, #-28]; or r16, r3, r3 // 4-byte Folded Spill Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.then
; CHECK-NEXT: nopb ; nopa ; nops ; j #.LBB3_3; nopv
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AIE/aie2/call_i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ define i64 @test_i64(<2 x i32> %x) {
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r0, r16 // Delay Slot 2
; CHECK-NEXT: mov r1, r17 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r0, r16, r16; mov r1, r17 // Delay Slot 1
entry:
%cast = bitcast <2 x i32> %x to i64
ret i64 %cast
Expand All @@ -29,11 +29,11 @@ define <2 x i32> @test_v2i32(i64 %x) {
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r16, r0 // Delay Slot 2
; CHECK-NEXT: mov r17, r1 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r16, r0, r0; mov r17, r1 // Delay Slot 1
entry:
%cast = bitcast i64 %x to <2 x i32>
ret <2 x i32> %cast
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/TanhTemplated-swp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,7 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
; CHECK-LABEL: TanhTemplated:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %for.body.lr.ph
; CHECK-NEXT: nopa ; mov r8, r16
; CHECK-NEXT: movxm r3, #16512
; CHECK-NEXT: nop ; movxm r3, #16512
; CHECK-NEXT: movxm r4, #-16256
; CHECK-NEXT: movxm r5, #32767
; CHECK-NEXT: movxm r0, #16256
Expand All @@ -86,7 +85,7 @@ define dso_local void @TanhTemplated(ptr noalias %ifm, ptr noalias %ofm, ptr non
; CHECK-NEXT: vconv.bf16.fp32 wl3, bmh2; vbcst.16 x4, r7
; CHECK-NEXT: vmov wh6, wl2
; CHECK-NEXT: vmin_ge.bf16 x3, r16, x3, x1
; CHECK-NEXT: vmax_lt.bf16 x3, r16, x3, x10
; CHECK-NEXT: or r8, r16, r16; vmax_lt.bf16 x3, r16, x3, x10
; CHECK-NEXT: vconv.bf16.fp32 wl5, bmh3; vband x7, x8, x3
; CHECK-NEXT: vmov wh7, wl2
; CHECK-NEXT: vmin_ge.bf16 x5, r16, x5, x1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/loop-with-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@ define dso_local void @_Z16addToSymbolTablePKci(ptr nocapture readonly %name, i3
; CHECK-LABEL: _Z16addToSymbolTablePKci:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: paddb [sp], #32; nopxm
; CHECK-NEXT: nopa ; paddb [sp], #32; nopx
; CHECK-NEXT: st p7, [sp, #-32]; movxm p7, #symbolCount // 4-byte Folded Spill
; CHECK-NEXT: lda r17, [p7, #0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: mov p2, p0
; CHECK-NEXT: mova r1, #56; st r16, [sp, #-20] // 4-byte Folded Spill
; CHECK-NEXT: nop
; CHECK-NEXT: mova r1, #56; mov p2, p0
; CHECK-NEXT: st r17, [sp, #-24]; jl #strcpy // 4-byte Folded Spill
; CHECK-NEXT: st p6, [sp, #-28]; mul r1, r17, r1 // 4-byte Folded Spill Delay Slot 5
; CHECK-NEXT: st lr, [sp, #-16]; movxm p6, #symbolTable // 4-byte Folded Spill Delay Slot 4
; CHECK-NEXT: mov m0, r1 // Delay Slot 3
; CHECK-NEXT: paddb [p6], m0; mov r16, r0 // Delay Slot 2
; CHECK-NEXT: mov p1, p6 // Delay Slot 1
; CHECK-NEXT: paddb [p6], m0; st r16, [sp, #-20] // 4-byte Folded Spill Delay Slot 2
; CHECK-NEXT: or r16, r0, r0; mov p1, p6 // Delay Slot 1
; CHECK-NEXT: lda lr, [sp, #-16]; nopx // 4-byte Folded Reload
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/postrapseudos/pseudoMove.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -verify-machineinstrs --march=aie2 -run-pass=postrapseudos %s -o - | FileCheck %s

---
name: pseudoMov_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: pseudoMov_GPR
; CHECK: $r1 = MOV_SCL_pseudo killed $r0
$r1 = PseudoMove $r0
...

---
name: pseudoMov_non_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: pseudoMov_non_GPR
; CHECK: $r1 = MOV_mv_scl killed $p0
; CHECK-NEXT: $p0 = MOV_mv_scl killed $r1
; CHECK-NEXT: $p1 = MOV_mv_scl killed $p0
; CHECK-NEXT: $r1 = MOV_mv_scl killed $s0
; CHECK-NEXT: $s0 = MOV_mv_scl killed $r1
; CHECK-NEXT: $s1 = MOV_mv_scl killed $s0
$r1 = PseudoMove $p0
$p0 = PseudoMove $r1
$p1 = PseudoMove $p0
$r1 = PseudoMove $s0
$s0 = PseudoMove $r1
$s1 = PseudoMove $s0
...

---
name: COPY_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: COPY_GPR
; CHECK: $r1 = MOV_SCL_pseudo $r0
$r1 = COPY $r0
...

---
name: COPY_non_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: COPY_non_GPR
; CHECK: $r1 = MOV_mv_scl $p0
; CHECK-NEXT: $p0 = MOV_mv_scl $r1
; CHECK-NEXT: $p1 = MOV_mv_scl $p0
; CHECK-NEXT: $r1 = MOV_mv_scl $s0
; CHECK-NEXT: $s0 = MOV_mv_scl $r1
; CHECK-NEXT: $s1 = MOV_mv_scl $s0
$r1 = COPY $p0
$p0 = COPY $r1
$p1 = COPY $p0
$r1 = COPY $s0
$s0 = COPY $r1
$s1 = COPY $s0
...
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/mov.mir
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,32 @@ body: |
$r2 = MOV_RLC_imm10_pseudo 1
$r3 = MOV_RLC_imm10_pseudo 2
...

---
name: multi_slot_mov_or_mov
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: multi_slot_mov_or_mov
; CHECK: BUNDLE implicit-def $r2, implicit-def $r7, implicit killed $r1, implicit killed $r6 {
; CHECK-NEXT: $r2 = MOV_OR killed $r1
; CHECK-NEXT: $r7 = MOV_mv_scl killed $r6
; CHECK-NEXT: }
$r7 = MOV_mv_scl $r6
$r2 = MOV_SCL_pseudo $r1
...

# Test for reverse order of multi-slot MOV and MOV_OR_GPR
# Since the multi-slot selection is greedy i.e first availbale slot is
# selected thus MOV_SCL_pseudo is materialized to M slot
---
name: multi_slot_mov_or_mov_reverse
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: multi_slot_mov_or_mov_reverse
; CHECK: $r2 = MOV_mv_scl killed $r1
; CHECK-NEXT: $r7 = MOV_mv_scl killed $r6
$r2 = MOV_SCL_pseudo $r1
$r7 = MOV_mv_scl $r6
...
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/streams.ll
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,7 @@ define void @put_ms_cph_doTlastReg(i32 inreg %tlast, i32 inreg %addr, i32 inreg
; CHECK-LABEL: put_ms_cph_doTlastReg:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r28, r0
; CHECK-NEXT: mov m0, r1
; CHECK-NEXT: nopb ; nopa ; nops ; or r28, r0, r0; mov m0, r1; nopv
; CHECK-NEXT: mov.cph ms, m0, #0, #3, r2, r28; ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
Expand Down Expand Up @@ -174,8 +173,7 @@ define i32 @put_ms_cph_nb_doTlastReg(i32 inreg %tlast, i32 inreg %addr, i32 inre
; CHECK-LABEL: put_ms_cph_nb_doTlastReg:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopx ; mov r28, r1
; CHECK-NEXT: mov m0, r2
; CHECK-NEXT: nopa ; nopb ; or r28, r1, r1; mov m0, r2; nops
; CHECK-NEXT: mov.cph.nb ms, m0, #3, #3, r3, r28; ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
Expand Down
Loading

0 comments on commit 6b76c28

Please sign in to comment.