Skip to content

Commit 0ab6a15

Browse files
committed
[X86] Add support for using fast short rep mov for memcpy lowering.
Disabled by default behind an option. Differential Revision: https://reviews.llvm.org/D86883
1 parent 72e2fbd commit 0ab6a15

File tree

3 files changed

+40
-1
lines changed

3 files changed

+40
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -3109,7 +3109,7 @@ argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
31093109
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
31103110
SDValue Chain, ISD::ArgFlagsTy Flags,
31113111
SelectionDAG &DAG, const SDLoc &dl) {
3112-
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
3112+
SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
31133113

31143114
return DAG.getMemcpy(
31153115
Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),

llvm/lib/Target/X86/X86SelectionDAGInfo.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ using namespace llvm;
2424

2525
#define DEBUG_TYPE "x86-selectiondag-info"
2626

27+
static cl::opt<bool>
28+
UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
29+
cl::desc("Use fast short rep mov in memcpy lowering"));
30+
2731
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
2832
SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
2933
// We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -306,6 +310,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
306310
const X86Subtarget &Subtarget =
307311
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
308312

313+
// If enabled and available, use fast short rep mov.
314+
if (UseFSRMForMemcpy && Subtarget.hasFSRM())
315+
return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
316+
309317
/// Handle constant sizes,
310318
if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
311319
return emitConstantSizeRepmov(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM
3+
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM
4+
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM
5+
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM
6+
; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM
7+
8+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
9+
10+
define void @test1(i8* %a, i8* %b, i64 %s) nounwind {
11+
; NOFSRM-LABEL: test1
12+
; NOFSRM: # %bb.0:
13+
; NOFSRM: jmp memcpy
14+
;
15+
; FSRM-LABEL: test1
16+
; FSRM: # %bb.0:
17+
; FSRM-NEXT: movq %rdx, %rcx
18+
; FSRM-NEXT: rep;movsb (%rsi), %es:(%rdi)
19+
; FSRM-NEXT: retq
20+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0)
21+
ret void
22+
}
23+
24+
; Check that we don't crash due to a memcpy size type mismatch error ("Cannot
25+
; emit physreg copy instruction") in X86InstrInfo::copyPhysReg.
26+
%struct = type { [4096 x i8] }
27+
declare void @foo(%struct* byval)
28+
define void @test2(%struct* %x) {
29+
call void @foo(%struct* byval %x)
30+
ret void
31+
}

0 commit comments

Comments
 (0)