diff --git a/llvm/Makefile b/llvm/Makefile index 86e5208e9..e9ab6e4af 100644 --- a/llvm/Makefile +++ b/llvm/Makefile @@ -1,5 +1,5 @@ # LLVM version. -VER=9.0.0 +VER=10.0.0 ROOT_DIR=$(shell pwd) @@ -21,7 +21,7 @@ llvm-${VER}.src: | llvm-${VER}.src.tar.xz tar -xJf llvm-${VER}.src.tar.xz llvm-${VER}.src.tar.xz: - wget -O $@ "https://releases.llvm.org/${VER}/llvm-${VER}.src.tar.xz" + wget -O $@ "https://github.com/llvm/llvm-project/releases/download/llvmorg-${VER}/llvm-${VER}.src.tar.xz" touch $@ clean: diff --git a/llvm/test/Analysis/BasicAA/assume-index-positive.ll b/llvm/test/Analysis/BasicAA/assume-index-positive.ll new file mode 100644 index 000000000..d89738a23 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/assume-index-positive.ll @@ -0,0 +1,116 @@ +; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info %s 2>&1 | FileCheck %s + +; %col.ptr.1 and %col.ptr.2 do not alias, if we know that %skip >= 0, because +; the distance between %col.ptr.1 and %col.ptr.2 is %skip + 6 and we load 6 +; elements. +define void @test1(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test1: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: NoAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sgt i32 %skip, -1 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but now we do not have an assume guaranteeing %skip >= 0. +define void @test2(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test2: 4 pointers, 0 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: MayAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but the assume just guarantees %skip > -3, which is not +; enough to derive NoAlias +define void @test3(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test3: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: MayAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: MayAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sgt i32 %skip, -3 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +; Same as @test1, but the assume uses the sge predicate for %skip >= 0. +define void @test4(double* %ptr, i32 %skip) { +; CHECK-LABEL: Function: test4: 4 pointers, 1 call sites +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.1, double* %ptr +; CHECK-NEXT: NoAlias: double* %col.ptr.2, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, double* %col.ptr.2 +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.2.cast, double* %ptr +; CHECK-NEXT: NoAlias: <6 x double>* %col.ptr.1, <6 x double>* %col.ptr.2.cast +; CHECK-NEXT: MustAlias: <6 x double>* %col.ptr.2.cast, double* %col.ptr.2 +; CHECK-NEXT: NoModRef: Ptr: double* %ptr <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.1 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: double* %col.ptr.2 <-> call void @llvm.assume(i1 %gt) +; CHECK-NEXT: NoModRef: Ptr: <6 x double>* %col.ptr.2.cast <-> call void @llvm.assume(i1 %gt) +; + %gt = icmp sge i32 %skip, 0 + call void @llvm.assume(i1 %gt) + %stride = add nsw nuw i32 %skip, 6 + %col.ptr.1 = bitcast double* %ptr to <6 x double>* + %lv.1 = load <6 x double>, <6 x double>* %col.ptr.1, align 8 + %col.ptr.2= getelementptr double, double* %ptr, i32 %stride + %col.ptr.2.cast = bitcast double* %col.ptr.2 to <6 x double>* + %lv.2 = load <6 x double>, <6 x double>* %col.ptr.2.cast, align 8 + %res.1 = fadd <6 x double> %lv.1, %lv.1 + %res.2 = fadd <6 x double> %lv.2, %lv.2 + store <6 x double> %res.1, <6 x double>* %col.ptr.1, align 8 + store <6 x double> %res.2, <6 x double>* %col.ptr.2.cast, align 8 + ret void +} + +declare void @llvm.assume(i1 %cond) diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index 98899993d..beb9eaa83 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -364,26 +364,26 @@ entry: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] ret void -; CHECK: Just Ref: Ptr: i8* %p <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Just Ref: Ptr: i8* %q <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] -; CHECK: NoModRef: call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] -; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #6 [ "unknown"() ] -; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #7 [ "unknown"() ] -; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #8 [ "unknown"() ] +; CHECK: Just Ref: Ptr: i8* %p <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Just Ref: Ptr: i8* %q <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %q <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: NoModRef: Ptr: i8* %p <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: Just Ref: call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] +; CHECK: NoModRef: call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] <-> call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] +; CHECK: Both ModRef: call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @a_readonly_func(i8* %p) #7 [ "unknown"() ] +; CHECK: NoModRef: call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @an_inaccessiblememonly_func() #8 [ "unknown"() ] +; CHECK: Both ModRef (MustAlias): call void @an_argmemonly_func(i8* %q) #10 [ "unknown"() ] <-> call void @an_inaccessibleorargmemonly_func(i8* %q) #9 [ "unknown"() ] } attributes #0 = { argmemonly nounwind } diff --git a/llvm/test/Analysis/BasicAA/dereferenceable.ll b/llvm/test/Analysis/BasicAA/dereferenceable.ll new file mode 100644 index 000000000..efc9addbe --- /dev/null +++ b/llvm/test/Analysis/BasicAA/dereferenceable.ll @@ -0,0 +1,149 @@ +; RUN: opt -basicaa -print-all-alias-modref-info -aa-eval -analyze < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@G = global i32 0, align 4 + +define i64 @global_and_deref_arg_1(i64* dereferenceable(8) %arg) { +; CHECK: Function: global_and_deref_arg_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* @G, i64* %arg +bb: + store i64 1, i64* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i64, i64* %arg, align 8 + ret i64 %tmp +} + +define i32 @global_and_deref_arg_2(i32* dereferenceable(8) %arg) { +; CHECK: Function: global_and_deref_arg_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* @G +bb: + store i32 1, i32* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i32, i32* %arg, align 8 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_1(i32* byval %obj, i64* dereferenceable(8) %arg) { +; CHECK: Function: byval_and_deref_arg_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %arg +bb: + store i32 1, i32* %obj, align 4 + store i64 0, i64* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_2(i32* byval %obj, i32* dereferenceable(8) %arg) { +; CHECK: Function: byval_and_deref_arg_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* %obj +bb: + store i32 1, i32* %obj, align 4 + store i32 0, i32* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +declare dereferenceable(8) i32* @get_i32_deref8() +declare dereferenceable(8) i64* @get_i64_deref8() +declare void @unknown(i32*) + +define i32 @local_and_deref_ret_1() { +; CHECK: Function: local_and_deref_ret_1: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(8) i64* @get_i64_deref8() + store i32 1, i32* %obj, align 4 + store i64 0, i64* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @local_and_deref_ret_2() { +; CHECK: Function: local_and_deref_ret_2: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i32* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(8) i32* @get_i32_deref8() + store i32 1, i32* %obj, align 4 + store i32 0, i32* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + + +; Baseline tests, same as above but with 2 instead of 8 dereferenceable bytes. + +define i64 @global_and_deref_arg_non_deref_1(i64* dereferenceable(2) %arg) { +; CHECK: Function: global_and_deref_arg_non_deref_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* @G, i64* %arg +bb: + store i64 1, i64* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i64, i64* %arg, align 8 + ret i64 %tmp +} + +define i32 @global_and_deref_arg_non_deref_2(i32* dereferenceable(2) %arg) { +; CHECK: Function: global_and_deref_arg_non_deref_2: 2 pointers, 0 call sites +; Different result than above (see @global_and_deref_arg_2). +; CHECK-NEXT: MayAlias: i32* %arg, i32* @G +bb: + store i32 1, i32* %arg, align 8 + store i32 0, i32* @G, align 4 + %tmp = load i32, i32* %arg, align 8 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_non_deref_1(i32* byval %obj, i64* dereferenceable(2) %arg) { +; CHECK: Function: byval_and_deref_arg_non_deref_1: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %arg +bb: + store i32 1, i32* %obj, align 4 + store i64 0, i64* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @byval_and_deref_arg_non_deref_2(i32* byval %obj, i32* dereferenceable(2) %arg) { +; CHECK: Function: byval_and_deref_arg_non_deref_2: 2 pointers, 0 call sites +; CHECK-NEXT: NoAlias: i32* %arg, i32* %obj +bb: + store i32 1, i32* %obj, align 4 + store i32 0, i32* %arg, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +declare dereferenceable(2) i32* @get_i32_deref2() +declare dereferenceable(2) i64* @get_i64_deref2() + +define i32 @local_and_deref_ret_non_deref_1() { +; CHECK: Function: local_and_deref_ret_non_deref_1: 2 pointers, 2 call sites +; CHECK-NEXT: NoAlias: i32* %obj, i64* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(2) i64* @get_i64_deref2() + store i32 1, i32* %obj, align 4 + store i64 0, i64* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} + +define i32 @local_and_deref_ret_non_deref_2() { +; CHECK: Function: local_and_deref_ret_non_deref_2: 2 pointers, 2 call sites +; Different result than above (see @local_and_deref_ret_2). +; CHECK-NEXT: MayAlias: i32* %obj, i32* %ret +bb: + %obj = alloca i32 + call void @unknown(i32* %obj) + %ret = call dereferenceable(2) i32* @get_i32_deref2() + store i32 1, i32* %obj, align 4 + store i32 0, i32* %ret, align 8 + %tmp = load i32, i32* %obj, align 4 + ret i32 %tmp +} diff --git a/llvm/test/Analysis/BasicAA/gep-alias.ll b/llvm/test/Analysis/BasicAA/gep-alias.ll index 1e435af2f..5fd77e19e 100644 --- a/llvm/test/Analysis/BasicAA/gep-alias.ll +++ b/llvm/test/Analysis/BasicAA/gep-alias.ll @@ -247,7 +247,7 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind { ; CHECK: [[U0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %u, i32 0, i32 0 ; CHECK: [[U0:%[a-zA-Z0-9_]+]] = load i8, i8* [[U0ADDR]], align 1 ; CHECK: [[U0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[U0]] to i32 -; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]]) +; CHECK: call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]]) ; CHECK: ret define void @test13() { entry: diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 68e59862b..cf792e8c6 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -22,6 +22,6 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nounwind readonly } -; CHECK: attributes #1 = { argmemonly nounwind } +; CHECK: attributes #0 = { argmemonly nounwind readonly willreturn } +; CHECK: attributes #1 = { argmemonly nounwind willreturn } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/pr18573.ll.golden b/llvm/test/Analysis/BasicAA/pr18573.ll.golden index 5567af365..bce732666 100644 --- a/llvm/test/Analysis/BasicAA/pr18573.ll.golden +++ b/llvm/test/Analysis/BasicAA/pr18573.ll.golden @@ -1,7 +1,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) #0 +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %0, i8* %1, <8 x i32> %2, <8 x float> %3, i8 %4) #0 define <8 x float> @foo1(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 { allocas: diff --git a/llvm/test/Analysis/BasicAA/ptrmask.ll b/llvm/test/Analysis/BasicAA/ptrmask.ll new file mode 100644 index 000000000..27c14ebb7 --- /dev/null +++ b/llvm/test/Analysis/BasicAA/ptrmask.ll @@ -0,0 +1,29 @@ +; RUN: opt -basicaa -aa-eval -print-no-aliases -disable-output %s 2>&1 | FileCheck %s + +%struct = type <{ [20 x i64] }> + +; CHECK-LABEL: Function: test_noalias: 4 pointers, 1 call sites +; CHECK-NEXT: NoAlias: %struct* %ptr1, i64* %ptr2 +; CHECK-NEXT: NoAlias: %struct* %addr.ptr, i64* %ptr2 +; CHECK-NEXT: NoAlias: i64* %gep, i64* %ptr2 +define void @test_noalias(%struct* noalias %ptr1, i64* %ptr2, i64 %offset) { +entry: + %addr.ptr = call %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct* %ptr1, i64 72057594037927928) + store i64 10, i64* %ptr2 + %gep = getelementptr inbounds %struct, %struct* %addr.ptr, i64 0, i32 0, i64 %offset + store i64 1, i64* %gep, align 8 + ret void +} + +; CHECK-NEXT: Function: test_alias: 4 pointers, 1 call sites +; CHECK-NOT: NoAlias +define void @test_alias(%struct* %ptr1, i64* %ptr2, i64 %offset) { +entry: + %addr.ptr = call %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct* %ptr1, i64 72057594037927928) + store i64 10, i64* %ptr2 + %gep = getelementptr inbounds %struct, %struct* %addr.ptr, i64 0, i32 0, i64 %offset + store i64 1, i64* %gep, align 8 + ret void +} + +declare %struct* @llvm.ptrmask.p0s_struct.p0s.struct.i64(%struct*, i64) diff --git a/llvm/test/Analysis/BasicAA/store-promote.ll b/llvm/test/Analysis/BasicAA/store-promote.ll index afe11c2a1..23b74bc1a 100644 --- a/llvm/test/Analysis/BasicAA/store-promote.ll +++ b/llvm/test/Analysis/BasicAA/store-promote.ll @@ -1,8 +1,9 @@ -; Test that LICM uses basicaa to do alias analysis, which is capable of +; Test that LICM uses basicaa to do alias analysis, which is capable of ; disambiguating some obvious cases. If LICM is able to disambiguate the ; two pointers, then the load should be hoisted, and the store sunk. -; RUN: opt < %s -basicaa -licm -S | FileCheck %s +; RUN: opt < %s -basicaa -licm -enable-mssa-loop-dependency=false -S | FileCheck %s -check-prefixes=CHECK,AST +; RUN: opt < %s -basicaa -licm -enable-mssa-loop-dependency=true -S | FileCheck %s -check-prefixes=CHECK,MSSA target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @A = global i32 7 ; [#uses=3] @@ -25,10 +26,13 @@ Out: ; preds = %Loop ; The Loop block should be empty after the load/store are promoted. ; CHECK: @test1 ; CHECK: load i32, i32* @A +; MSSA: load i32, i32* @A +; MSSA: store i32 %Atmp, i32* @B ; CHECK: Loop: ; CHECK-NEXT: br i1 %c, label %Out, label %Loop ; CHECK: Out: -; CHECK: store i32 %Atmp, i32* @B +; AST: store i32 %Atmp, i32* @B +; AST: load i32, i32* @A } define i32 @test2(i1 %c) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll index 64e0a8245..8212cc476 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll @@ -141,6 +141,24 @@ exit: ret i32 %result } +define i32 @test_cold_loop(i32 %a, i32 %b) { +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit + +header: + br label %body + +body: + %cond2 = icmp eq i32 %b, 42 + br i1 %cond2, label %header, label %exit +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% + +exit: + call void @coldfunc() + ret i32 %b +} + declare i32 @regular_function(i32 %i) define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { diff --git a/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll b/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll new file mode 100644 index 000000000..8089916fb --- /dev/null +++ b/llvm/test/Analysis/BranchProbabilityInfo/fcmp.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -analyze -branch-prob | FileCheck %s + +; This function tests the floating point unorder comparison. The probability +; of NaN should be extremely small. +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'uno' +; CHECK: edge -> a probability is 0x00000800 / 0x80000000 = 0.00% +; CHECK: edge -> b probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] + +define void @uno(float %val1, float %val2) { + %cond = fcmp uno float %val1, %val2 + br i1 %cond, label %a, label %b + +a: + call void @fa() + ret void + +b: + call void @fb() + ret void +} + +; This function tests the floating point order comparison. +; CHECK: Printing analysis 'Branch Probability Analysis' for function 'ord' +; CHECK: edge -> a probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge] +; CHECK: edge -> b probability is 0x00000800 / 0x80000000 = 0.00% + +define void @ord(float %val1, float %val2) { + %cond = fcmp ord float %val1, %val2 + br i1 %cond, label %a, label %b + +a: + call void @fa() + ret void + +b: + call void @fb() + ret void +} + +declare void @fa() +declare void @fb() diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll index 0566ca16c..6e01afd2c 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll @@ -79,6 +79,32 @@ exit: ret i32 %b } +define i32 @test4(i32 %a, i32 %b) { +; CHECK: Printing analysis {{.*}} for function 'test4' +; Make sure we handle loops post-dominated by unreachables. +entry: + %cond1 = icmp eq i32 %a, 42 + br i1 %cond1, label %header, label %exit +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] + +header: + br label %body + +body: + %cond2 = icmp eq i32 %a, 42 + br i1 %cond2, label %header, label %abort +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% + +abort: + call void @abort() noreturn + unreachable + +exit: + ret i32 %b +} + @_ZTIi = external global i8* ; CHECK-LABEL: throwSmallException diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll index 66ac89793..8674f9e5a 100644 --- a/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll +++ b/llvm/test/Analysis/BranchProbabilityInfo/pr22718.ll @@ -73,9 +73,9 @@ declare i32 @rand() #1 declare i32 @printf(i8*, ...) #2 -attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } !llvm.ident = !{!0} diff --git a/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll b/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll new file mode 100644 index 000000000..683078921 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/binop-identity-undef.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -constprop -S %s | FileCheck %s + +define i32 @and1() { +; CHECK-LABEL: @and1( +; CHECK-NEXT: ret i32 undef +; + %r = and i32 undef, -1 + ret i32 %r +} + +define i32 @and2() { +; CHECK-LABEL: @and2( +; CHECK-NEXT: ret i32 undef +; + %r = and i32 -1, undef + ret i32 %r +} + +define i32 @and3_no_identity() { +; CHECK-LABEL: @and3_no_identity( +; CHECK-NEXT: ret i32 0 +; + %r = and i32 10, undef + ret i32 %r +} + +define i32 @or1() { +; CHECK-LABEL: @or1( +; CHECK-NEXT: ret i32 undef +; + %r = or i32 0, undef + ret i32 %r +} + +define i32 @or2() { +; CHECK-LABEL: @or2( +; CHECK-NEXT: ret i32 undef +; + %r = or i32 undef, 0 + ret i32 %r +} + +define i32 @or3_no_identity() { +; CHECK-LABEL: @or3_no_identity( +; CHECK-NEXT: ret i32 -1 +; + %r = or i32 undef, 10 + ret i32 %r +} diff --git a/llvm/test/Analysis/ConstantFolding/copysign.ll b/llvm/test/Analysis/ConstantFolding/copysign.ll new file mode 100644 index 000000000..228ffcb47 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/copysign.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -constprop < %s | FileCheck %s + +declare float @llvm.copysign.f32(float, float) +declare double @llvm.copysign.f64(double, double) + +define float @f32_01() { +; CHECK-LABEL: @f32_01( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.copysign.f32(float 1.0, float -2.0) + ret float %x +} + +define float @f32_02() { +; CHECK-LABEL: @f32_02( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float 1.0) + ret float %x +} + +define float @f32_03() { +; CHECK-LABEL: @f32_03( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.copysign.f32(float -2.0, float -1.0) + ret float %x +} + +define double @f64_01() { +; CHECK-LABEL: @f64_01( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double 1.0, double -2.0) + ret double %x +} + +define double @f64_02() { +; CHECK-LABEL: @f64_02( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double 2.0) + ret double %x +} + +define double @f64_03() { +; CHECK-LABEL: @f64_03( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.copysign.f64(double -1.0, double -2.0) + ret double %x +} diff --git a/llvm/test/Analysis/ConstantFolding/gep-alias.ll b/llvm/test/Analysis/ConstantFolding/gep-alias.ll new file mode 100644 index 000000000..0fcc778a4 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/gep-alias.ll @@ -0,0 +1,17 @@ +; RUN: opt -instcombine -S -o - %s | FileCheck %s +; Test that we don't replace an alias with its aliasee when simplifying GEPs. +; In this test case the transformation is invalid because it replaces the +; reference to the symbol "b" (which refers to whichever instance of "b" +; was chosen by the linker) with a reference to "a" (which refers to the +; specific instance of "b" in this module). + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = internal global [3 x i8*] zeroinitializer +@b = linkonce_odr alias [3 x i8*], [3 x i8*]* @a + +define i8** @f() { + ; CHECK: ret i8** getelementptr ([3 x i8*], [3 x i8*]* @b, i64 0, i64 1) + ret i8** getelementptr ([3 x i8*], [3 x i8*]* @b, i64 0, i64 1) +} diff --git a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll index bb5fcbdb8..03d27e9fb 100644 --- a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll +++ b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll @@ -9,7 +9,7 @@ define <2 x i16*> @test_gep() { ; CHECK-LABEL: @test_gep( -; CHECK-NEXT: ret <2 x i16*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)> +; CHECK-NEXT: ret <2 x i16*> ; %A = getelementptr [1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer %B = bitcast <2 x %rec8*> %A to <2 x i16*> diff --git a/llvm/test/Analysis/ConstantFolding/insertelement.ll b/llvm/test/Analysis/ConstantFolding/insertelement.ll new file mode 100644 index 000000000..960042acf --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/insertelement.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + + +define <4 x i32> @insertelement_fixedlength_constant() { +; CHECK-LABEL: @insertelement_fixedlength_constant( +; CHECK-NEXT: ret <4 x i32> +; + %i = insertelement <4 x i32> undef, i32 1, i32 0 + ret <4 x i32> %i +} + +define @insertelement_scalable_constant() { +; CHECK-LABEL: @insertelement_scalable_constant( +; CHECK-NEXT: ret insertelement ( undef, i32 1, i32 0) +; + %i = insertelement undef, i32 1, i32 0 + ret %i +} diff --git a/llvm/test/Analysis/ConstantFolding/math-1.ll b/llvm/test/Analysis/ConstantFolding/math-1.ll new file mode 100644 index 000000000..595095017 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/math-1.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -S -o - %s | FileCheck %s + +declare double @acos(double) +define double @f_acos() { +; CHECK-LABEL: @f_acos( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @acos(double 1.0) + ret double %res +} + +declare float @asinf(float) +define float @f_asinf() { +; CHECK-LABEL: @f_asinf( +; CHECK-NEXT: ret float 0x3FF921FB{{.+}} +; + %res = tail call fast float @asinf(float 1.0) + ret float %res +} + +declare double @atan(double) +define double @f_atan() { +; CHECK-LABEL: @f_atan( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan(double 1.000000e+00) +; CHECK-NEXT: ret double 0x3FE921FB +; + %res = tail call fast double @atan(double 1.0) + ret double %res +} + +declare float @cosf(float) +define float @f_cosf() { +; CHECK-LABEL: @f_cosf( +; CHECK-NEXT: ret float 0x3FE14A2{{.+}} +; + %res = tail call fast float @cosf(float 1.0) + ret float %res +} + +declare float @llvm.cos.f32(float) +define float @i_cosf() { +; CHECK-LABEL: @i_cosf( +; CHECK-NEXT: ret float 0x3FE14A2 +; + %res = tail call fast float @llvm.cos.f32(float 1.0) + ret float %res +} + +declare double @cosh(double) +define double @f_cosh() { +; CHECK-LABEL: @f_cosh( +; CHECK-NEXT: ret double 0x3FF8B075{{.+}} +; + %res = tail call fast double @cosh(double 1.0) + ret double %res +} + +declare float @expf(float) +define float @f_expf() { +; CHECK-LABEL: @f_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @expf(float 1.0) + ret float %res +} + +declare float @llvm.exp.f32(float) +define float @i_expf() { +; CHECK-LABEL: @i_expf( +; CHECK-NEXT: ret float 0x4005BF0A{{.+}} +; + %res = tail call fast float @llvm.exp.f32(float 1.0) + ret float %res +} + +declare double @exp2(double) +define double @f_exp2() { +; CHECK-LABEL: @f_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @exp2(double 1.0) + ret double %res +} + +declare double @llvm.exp2.f64(double) +define double @i_exp2() { +; CHECK-LABEL: @i_exp2( +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = tail call fast double @llvm.exp2.f64(double 1.0) + ret double %res +} + +; FIXME: exp10() is not widely supported. +declare float @exp10f(float) +define float @f_exp10f() { +; CHECK-LABEL: @f_exp10f( +; CHECK-NEXT: [[RES:%.*]] = tail call float @exp10f(float 1.000000e+00) +; CHECK-NEXT: ret float [[RES]] +; + %res = tail call float @exp10f(float 1.0) + ret float %res +} + +declare double @log(double) +define double @f_log() { +; CHECK-LABEL: @f_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log(double 1.0) + ret double %res +} + +declare double @llvm.log.f64(double) +define double @i_log() { +; CHECK-LABEL: @i_log( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @llvm.log.f64(double 1.0) + ret double %res +} + +declare float @log2f(float) +define float @f_log2f() { +; CHECK-LABEL: @f_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @log2f(float 1.0) + ret float %res +} + +declare float @llvm.log2.f32(float) +define float @i_log2f() { +; CHECK-LABEL: @i_log2f( +; CHECK-NEXT: ret float 0.000000e+00 +; + %res = tail call fast float @llvm.log2.f32(float 1.0) + ret float %res +} + +declare double @log10(double) +define double @f_log10() { +; CHECK-LABEL: @f_log10( +; CHECK-NEXT: ret double 0.000000e+00 +; + %res = tail call fast double @log10(double 1.0) + ret double %res +} + +declare float @sinf(float) +define float @f_sinf() { +; CHECK-LABEL: @f_sinf( +; CHECK-NEXT: ret float 0x3FEAED54{{.+}} +; + %res = tail call fast float @sinf(float 1.0) + ret float %res +} + +declare double @sinh(double) +define double @f_sinh() { +; CHECK-LABEL: @f_sinh( +; CHECK-NEXT: ret double 0x3FF2CD9F{{.+}} +; + %res = tail call fast double @sinh(double 1.0) + ret double %res +} + +declare float @sqrtf(float) +define float @f_sqrtf() { +; CHECK-LABEL: @f_sqrtf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @sqrtf(float 1.0) + ret float %res +} + +declare double @tan(double) +define double @f_tan() { +; CHECK-LABEL: @f_tan( +; CHECK-NEXT: ret double 0x3FF8EB24{{.+}} +; + %res = tail call fast double @tan(double 1.0) + ret double %res +} + +declare float @tanhf(float) +define float @f_tanhf() { +; CHECK-LABEL: @f_tanhf( +; CHECK-NEXT: [[RES:%.*]] = tail call fast float @tanhf(float 1.000000e+00) +; CHECK-NEXT: ret float 0x3FE85EFA{{.+}} +; + %res = tail call fast float @tanhf(float 1.0) + ret float %res +} diff --git a/llvm/test/Analysis/ConstantFolding/math-2.ll b/llvm/test/Analysis/ConstantFolding/math-2.ll new file mode 100644 index 000000000..90b64797f --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/math-2.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -early-cse -S -o - %s | FileCheck %s + +declare double @atan2(double, double) +define double @f_atan2() { +; CHECK-LABEL: @f_atan2( +; CHECK-NEXT: [[RES:%.*]] = tail call fast double @atan2(double 1.000000e+00, double 2.000000e+00) +; CHECK-NEXT: ret double 0x3FDDAC6{{.+}} +; + %res = tail call fast double @atan2(double 1.0, double 2.0) + ret double %res +} + +declare float @fmodf(float, float) +define float @f_fmodf() { +; CHECK-LABEL: @f_fmodf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @fmodf(float 1.0, float 2.0) + ret float %res +} + +declare double @pow(double, double) +define double @f_pow() { +; CHECK-LABEL: @f_pow( +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = tail call fast double @pow(double 1.0, double 2.0) + ret double %res +} + +declare float @llvm.pow.f32(float, float) +define float @i_powf() { +; CHECK-LABEL: @i_powf( +; CHECK-NEXT: ret float 1.000000e+00 +; + %res = tail call fast float @llvm.pow.f32(float 1.0, float 2.0) + ret float %res +} + +declare double @llvm.powi.f64(double, i32) +define double @i_powi() { +; CHECK-LABEL: @i_powi( +; CHECK-NEXT: ret double 1.000000e+00 +; + %res = tail call fast double @llvm.powi.f64(double 1.0, i32 2) + ret double %res +} diff --git a/llvm/test/Analysis/ConstantFolding/rint.ll b/llvm/test/Analysis/ConstantFolding/rint.ll new file mode 100644 index 000000000..9ad794d60 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/rint.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @nearbyintf(float) #0 +declare float @llvm.nearbyint.f32(float) #0 +declare double @nearbyint(double) #0 +declare double @llvm.nearbyint.f64(double) #0 +declare float @rintf(float) #0 +declare float @llvm.rint.f32(float) #0 +declare double @rint(double) #0 +declare double @llvm.rint.f64(double) #0 + +define float @constant_fold_rint_f32_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @nearbyintf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_rint_f32_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_03( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @rintf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_04( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.rint.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_rint_f32_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_05( +; CHECK-NEXT: ret float 3.000000e+00 +; + %x = call float @nearbyintf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_rint_f32_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f32_06( +; CHECK-NEXT: ret float -3.000000e+00 +; + %x = call float @llvm.nearbyint.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_rint_f64_01() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @rint(double 1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_02() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.rint.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_rint_f64_03() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_03( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @nearbyint(double 1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_04() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_04( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.nearbyint.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_rint_f64_05() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_05( +; CHECK-NEXT: ret double 3.000000e+00 +; + %x = call double @rint(double 2.7) #0 + ret double %x +} + +define double @constant_fold_rint_f64_06() #0 { +; CHECK-LABEL: @constant_fold_rint_f64_06( +; CHECK-NEXT: ret double -3.000000e+00 +; + %x = call double @llvm.rint.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/ConstantFolding/round.ll b/llvm/test/Analysis/ConstantFolding/round.ll new file mode 100644 index 000000000..d5b847810 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/round.ll @@ -0,0 +1,92 @@ +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @roundf(float) #0 +declare float @llvm.round.f32(float) #0 +declare double @round(double) #0 +declare double @llvm.round.f64(double) #0 + +; CHECK-LABEL: @constant_fold_round_f32_01 +; CHECK-NEXT: ret float 1.000000e+00 +define float @constant_fold_round_f32_01() #0 { + %x = call float @roundf(float 1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_02 +; CHECK-NEXT: ret float -1.000000e+00 +define float @constant_fold_round_f32_02() #0 { + %x = call float @llvm.round.f32(float -1.25) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_03 +; CHECK-NEXT: ret float 2.000000e+00 +define float @constant_fold_round_f32_03() #0 { + %x = call float @roundf(float 1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_04 +; CHECK-NEXT: ret float -2.000000e+00 +define float @constant_fold_round_f32_04() #0 { + %x = call float @llvm.round.f32(float -1.5) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_05 +; CHECK-NEXT: ret float 3.000000e+00 +define float @constant_fold_round_f32_05() #0 { + %x = call float @roundf(float 2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f32_06 +; CHECK-NEXT: ret float -3.000000e+00 +define float @constant_fold_round_f32_06() #0 { + %x = call float @llvm.round.f32(float -2.75) #0 + ret float %x +} + +; CHECK-LABEL: @constant_fold_round_f64_01 +; CHECK-NEXT: ret double 1.000000e+00 +define double @constant_fold_round_f64_01() #0 { + %x = call double @round(double 1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_02 +; CHECK-NEXT: ret double -1.000000e+00 +define double @constant_fold_round_f64_02() #0 { + %x = call double @llvm.round.f64(double -1.3) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_03 +; CHECK-NEXT: ret double 2.000000e+00 +define double @constant_fold_round_f64_03() #0 { + %x = call double @round(double 1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_04 +; CHECK-NEXT: ret double -2.000000e+00 +define double @constant_fold_round_f64_04() #0 { + %x = call double @llvm.round.f64(double -1.5) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_05 +; CHECK-NEXT: ret double 3.000000e+00 +define double @constant_fold_round_f64_05() #0 { + %x = call double @round(double 2.7) #0 + ret double %x +} + +; CHECK-LABEL: @constant_fold_round_f64_06 +; CHECK-NEXT: ret double -3.000000e+00 +define double @constant_fold_round_f64_06() #0 { + %x = call double @llvm.round.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/ConstantFolding/shufflevector.ll b/llvm/test/Analysis/ConstantFolding/shufflevector.ll new file mode 100644 index 000000000..d69c2caec --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/shufflevector.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +define @shufflevector_scalable_constant() { +; CHECK-LABEL: @shufflevector_scalable_constant( +; CHECK-NEXT: ret shufflevector ( insertelement ( undef, i32 1, i32 0), undef, zeroinitializer) +; + %i = insertelement undef, i32 1, i32 0 + %i2 = shufflevector %i, undef, zeroinitializer + ret %i2 +} diff --git a/llvm/test/Analysis/ConstantFolding/trunc.ll b/llvm/test/Analysis/ConstantFolding/trunc.ll new file mode 100644 index 000000000..df1380917 --- /dev/null +++ b/llvm/test/Analysis/ConstantFolding/trunc.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +declare float @truncf(float) #0 +declare float @llvm.trunc.f32(float) #0 +declare double @trunc(double) #0 +declare double @llvm.trunc.f64(double) #0 + +define float @constant_fold_trunc_f32_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_01( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_02( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.25) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_03( +; CHECK-NEXT: ret float 1.000000e+00 +; + %x = call float @truncf(float 1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_04( +; CHECK-NEXT: ret float -1.000000e+00 +; + %x = call float @llvm.trunc.f32(float -1.5) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_05( +; CHECK-NEXT: ret float 2.000000e+00 +; + %x = call float @truncf(float 2.75) #0 + ret float %x +} + +define float @constant_fold_trunc_f32_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f32_06( +; CHECK-NEXT: ret float -2.000000e+00 +; + %x = call float @llvm.trunc.f32(float -2.75) #0 + ret float %x +} + +define double @constant_fold_trunc_f64_01() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_01( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_02() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_02( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.3) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_03() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_03( +; CHECK-NEXT: ret double 1.000000e+00 +; + %x = call double @trunc(double 1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_04() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_04( +; CHECK-NEXT: ret double -1.000000e+00 +; + %x = call double @llvm.trunc.f64(double -1.5) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_05() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_05( +; CHECK-NEXT: ret double 2.000000e+00 +; + %x = call double @trunc(double 2.7) #0 + ret double %x +} + +define double @constant_fold_trunc_f64_06() #0 { +; CHECK-LABEL: @constant_fold_trunc_f64_06( +; CHECK-NEXT: ret double -2.000000e+00 +; + %x = call double @llvm.trunc.f64(double -2.7) #0 + ret double %x +} + +attributes #0 = { nounwind readnone } diff --git a/llvm/test/Analysis/CostModel/AArch64/aggregates.ll b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll new file mode 100644 index 000000000..35d232b3b --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/aggregates.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s --check-prefixes=ALL,THROUGHPUT +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=latency -analyze | FileCheck %s --check-prefixes=ALL,LATENCY +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=code-size -analyze | FileCheck %s --check-prefixes=ALL,CODESIZE + +define i32 @extract_first_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_first_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_first_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_first_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 0 + ret i32 %r +} + +define i32 @extract_second_i32({i32, i32} %agg) { +; THROUGHPUT-LABEL: 'extract_second_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_second_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_second_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i32 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i32} %agg, 1 + ret i32 %r +} + +define i32 @extract_i32({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i32' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; +; LATENCY-LABEL: 'extract_i32' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; +; CODESIZE-LABEL: 'extract_i32' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 0 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r +; + %r = extractvalue {i32, i1} %agg, 0 + ret i32 %r +} + +define i1 @extract_i1({i32, i1} %agg) { +; THROUGHPUT-LABEL: 'extract_i1' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %r +; +; LATENCY-LABEL: 'extract_i1' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; +; CODESIZE-LABEL: 'extract_i1' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, i1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i1 %r +; + %r = extractvalue {i32, i1} %agg, 1 + ret i1 %r +} + +define float @extract_float({i32, float} %agg) { +; THROUGHPUT-LABEL: 'extract_float' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; +; LATENCY-LABEL: 'extract_float' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; +; CODESIZE-LABEL: 'extract_float' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, float } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float %r +; + %r = extractvalue {i32, float} %agg, 1 + ret float %r +} + +define [42 x i42] @extract_array({i32, [42 x i42]} %agg) { +; THROUGHPUT-LABEL: 'extract_array' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret [42 x i42] %r +; +; LATENCY-LABEL: 'extract_array' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; +; CODESIZE-LABEL: 'extract_array' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, [42 x i42] } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret [42 x i42] %r +; + %r = extractvalue {i32, [42 x i42]} %agg, 1 + ret [42 x i42] %r +} + +define <42 x i42> @extract_vector({i32, <42 x i42>} %agg) { +; THROUGHPUT-LABEL: 'extract_vector' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <42 x i42> %r +; +; LATENCY-LABEL: 'extract_vector' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; +; CODESIZE-LABEL: 'extract_vector' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, <42 x i42> } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <42 x i42> %r +; + %r = extractvalue {i32, <42 x i42>} %agg, 1 + ret <42 x i42> %r +} + +%T1 = type { i32, float, <4 x i1> } + +define %T1 @extract_struct({i32, %T1} %agg) { +; THROUGHPUT-LABEL: 'extract_struct' +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret %T1 %r +; +; LATENCY-LABEL: 'extract_struct' +; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; +; CODESIZE-LABEL: 'extract_struct' +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r = extractvalue { i32, %T1 } %agg, 1 +; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret %T1 %r +; + %r = extractvalue {i32, %T1} %agg, 1 + ret %T1 %r +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll index b538b12f0..9a2c01058 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll @@ -1,8 +1,11 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOW16,ALL %s -; CHECK: 'add_i32' -; CHECK: estimated cost of 1 for {{.*}} add i32 + +; ALL: 'add_i32' +; ALL: estimated cost of 1 for {{.*}} add i32 define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %add = add i32 %vec, %b @@ -10,8 +13,8 @@ define amdgpu_kernel void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'add_v2i32' -; CHECK: estimated cost of 2 for {{.*}} add <2 x i32> +; ALL: 'add_v2i32' +; ALL: estimated cost of 2 for {{.*}} add <2 x i32> define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 { %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr %add = add <2 x i32> %vec, %b @@ -19,10 +22,10 @@ define amdgpu_kernel void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add ret void } -; CHECK: 'add_v3i32' +; ALL: 'add_v3i32' ; Allow for 4 when v3i32 is illegal and TargetLowering thinks it needs widening, ; and 3 when it is legal. -; CHECK: estimated cost of {{[34]}} for {{.*}} add <3 x i32> +; ALL: estimated cost of {{[34]}} for {{.*}} add <3 x i32> define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr %add = add <3 x i32> %vec, %b @@ -30,8 +33,8 @@ define amdgpu_kernel void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> add ret void } -; CHECK: 'add_v4i32' -; CHECK: estimated cost of 4 for {{.*}} add <4 x i32> +; ALL: 'add_v4i32' +; ALL: estimated cost of 4 for {{.*}} add <4 x i32> define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 { %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr %add = add <4 x i32> %vec, %b @@ -39,10 +42,10 @@ define amdgpu_kernel void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add ret void } -; CHECK: 'add_v5i32' +; ALL: 'add_v5i32' ; Allow for 8 when v3i32 is illegal and TargetLowering thinks it needs widening, ; and 5 when it is legal. -; CHECK: estimated cost of {{[58]}} for {{.*}} add <5 x i32> +; ALL: estimated cost of {{[58]}} for {{.*}} add <5 x i32> define amdgpu_kernel void @add_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> addrspace(1)* %vaddr, <5 x i32> %b) #0 { %vec = load <5 x i32>, <5 x i32> addrspace(1)* %vaddr %add = add <5 x i32> %vec, %b @@ -50,8 +53,8 @@ define amdgpu_kernel void @add_v5i32(<5 x i32> addrspace(1)* %out, <5 x i32> add ret void } -; CHECK: 'add_i64' -; CHECK: estimated cost of 2 for {{.*}} add i64 +; ALL: 'add_i64' +; ALL: estimated cost of 2 for {{.*}} add i64 define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %add = add i64 %vec, %b @@ -59,8 +62,8 @@ define amdgpu_kernel void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } -; CHECK: 'add_v2i64' -; CHECK: estimated cost of 4 for {{.*}} add <2 x i64> +; ALL: 'add_v2i64' +; ALL: estimated cost of 4 for {{.*}} add <2 x i64> define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 { %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr %add = add <2 x i64> %vec, %b @@ -68,8 +71,8 @@ define amdgpu_kernel void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add ret void } -; CHECK: 'add_v3i64' -; CHECK: estimated cost of 6 for {{.*}} add <3 x i64> +; ALL: 'add_v3i64' +; ALL: estimated cost of 6 for {{.*}} add <3 x i64> define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr %add = add <3 x i64> %vec, %b @@ -77,8 +80,8 @@ define amdgpu_kernel void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> add ret void } -; CHECK: 'add_v4i64' -; CHECK: estimated cost of 8 for {{.*}} add <4 x i64> +; ALL: 'add_v4i64' +; ALL: estimated cost of 8 for {{.*}} add <4 x i64> define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 { %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr %add = add <4 x i64> %vec, %b @@ -86,8 +89,8 @@ define amdgpu_kernel void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add ret void } -; CHECK: 'add_v16i64' -; CHECK: estimated cost of 32 for {{.*}} add <16 x i64> +; ALL: 'add_v16i64' +; ALL: estimated cost of 32 for {{.*}} add <16 x i64> define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 { %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr %add = add <16 x i64> %vec, %b @@ -95,8 +98,8 @@ define amdgpu_kernel void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> ret void } -; CHECK: 'add_i16' -; CHECK: estimated cost of 1 for {{.*}} add i16 +; ALL: 'add_i16' +; ALL: estimated cost of 1 for {{.*}} add i16 define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { %vec = load i16, i16 addrspace(1)* %vaddr %add = add i16 %vec, %b @@ -104,8 +107,9 @@ define amdgpu_kernel void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %va ret void } -; CHECK: 'add_v2i16' -; CHECK: estimated cost of 2 for {{.*}} add <2 x i16> +; ALL: 'add_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} add <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} add <2 x i16> define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %add = add <2 x i16> %vec, %b @@ -113,8 +117,8 @@ define amdgpu_kernel void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add ret void } -; CHECK: 'sub_i32' -; CHECK: estimated cost of 1 for {{.*}} sub i32 +; ALL: 'sub_i32' +; ALL: estimated cost of 1 for {{.*}} sub i32 define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %sub = sub i32 %vec, %b @@ -122,16 +126,16 @@ define amdgpu_kernel void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'sub_i64' -; CHECK: estimated cost of 2 for {{.*}} sub i64 +; ALL: 'sub_i64' +; ALL: estimated cost of 2 for {{.*}} sub i64 define amdgpu_kernel void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %sub = sub i64 %vec, %b store i64 %sub, i64 addrspace(1)* %out ret void } -; CHECK: 'sub_i16' -; CHECK: estimated cost of 1 for {{.*}} sub i16 +; ALL: 'sub_i16' +; ALL: estimated cost of 1 for {{.*}} sub i16 define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { %vec = load i16, i16 addrspace(1)* %vaddr %sub = sub i16 %vec, %b @@ -139,8 +143,9 @@ define amdgpu_kernel void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %va ret void } -; CHECK: 'sub_v2i16' -; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16> +; ALL: 'sub_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} sub <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} sub <2 x i16> define amdgpu_kernel void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %sub = sub <2 x i16> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll index f15ab500a..a87a965c6 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll @@ -1,4 +1,5 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s ; CHECK-LABEL: 'addrspacecast_global_to_flat' ; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8* diff --git a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll index aa70f5032..2dec5f350 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/bit-ops.ll @@ -1,7 +1,10 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW16 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST16 %s -; CHECK: 'or_i32' -; CHECK: estimated cost of 1 for {{.*}} or i32 +; ALL: 'or_i32' +; ALL: estimated cost of 1 for {{.*}} or i32 define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = or i32 %vec, %b @@ -9,8 +12,8 @@ define amdgpu_kernel void @or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vad ret void } -; CHECK: 'or_i64' -; CHECK: estimated cost of 2 for {{.*}} or i64 +; ALL: 'or_i64' +; ALL: estimated cost of 2 for {{.*}} or i64 define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = or i64 %vec, %b @@ -18,8 +21,18 @@ define amdgpu_kernel void @or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vad ret void } -; CHECK: 'xor_i32' -; CHECK: estimated cost of 1 for {{.*}} xor i32 +; ALL: 'or_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} or <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} or <2 x i16> +define amdgpu_kernel void @or_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %or = or <2 x i16> %vec, %b + store <2 x i16> %or, <2 x i16> addrspace(1)* %out + ret void +} + +; ALL: 'xor_i32' +; ALL: estimated cost of 1 for {{.*}} xor i32 define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = xor i32 %vec, %b @@ -27,8 +40,8 @@ define amdgpu_kernel void @xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'xor_i64' -; CHECK: estimated cost of 2 for {{.*}} xor i64 +; ALL: 'xor_i64' +; ALL: estimated cost of 2 for {{.*}} xor i64 define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = xor i64 %vec, %b @@ -36,9 +49,18 @@ define amdgpu_kernel void @xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } +; ALL: 'xor_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} xor <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} xor <2 x i16> +define amdgpu_kernel void @xor_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %xor = xor <2 x i16> %vec, %b + store <2 x i16> %xor, <2 x i16> addrspace(1)* %out + ret void +} -; CHECK: 'and_i32' -; CHECK: estimated cost of 1 for {{.*}} and i32 +; ALL: 'and_i32' +; ALL: estimated cost of 1 for {{.*}} and i32 define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { %vec = load i32, i32 addrspace(1)* %vaddr %or = and i32 %vec, %b @@ -46,8 +68,8 @@ define amdgpu_kernel void @and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %va ret void } -; CHECK: 'and_i64' -; CHECK: estimated cost of 2 for {{.*}} and i64 +; ALL: 'and_i64' +; ALL: estimated cost of 2 for {{.*}} and i64 define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { %vec = load i64, i64 addrspace(1)* %vaddr %or = and i64 %vec, %b @@ -55,5 +77,14 @@ define amdgpu_kernel void @and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %va ret void } +; ALL: 'and_v2i16' +; SLOW16: estimated cost of 2 for {{.*}} and <2 x i16> +; FAST16: estimated cost of 1 for {{.*}} and <2 x i16> +define amdgpu_kernel void @and_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { + %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr + %and = and <2 x i16> %vec, %b + store <2 x i16> %and, <2 x i16> addrspace(1)* %out + ret void +} attributes #0 = { nounwind } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll index 889cc8bb8..67ce8ffba 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/extractelement.ll @@ -1,6 +1,10 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji %s | FileCheck -check-prefixes=GCN,GFX89 %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 %s | FileCheck -check-prefixes=GCN,GFX89 %s + ; GCN: 'extractelement_v2i32' ; GCN: estimated cost of 0 for {{.*}} extractelement <2 x i32> @@ -113,8 +117,7 @@ define amdgpu_kernel void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> a ; GCN: 'extractelement_0_v2i16': ; CI: estimated cost of 1 for {{.*}} extractelement <2 x i16> %vec, i16 0 -; VI: estimated cost of 0 for {{.*}} extractelement <2 x i16> -; GFX9: estimated cost of 0 for {{.*}} extractelement <2 x i16> +; GFX89: estimated cost of 0 for {{.*}} extractelement <2 x i16> define amdgpu_kernel void @extractelement_0_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) { %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr %elt = extractelement <2 x i16> %vec, i16 0 diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll index 51e65fe91..de5381c21 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fabs.ll @@ -1,6 +1,7 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s -; CHECK: 'fabs_f32' +; CHECK-LABEL: 'fabs_f32' ; CHECK: estimated cost of 0 for {{.*}} call float @llvm.fabs.f32 define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { %vec = load float, float addrspace(1)* %vaddr @@ -9,7 +10,7 @@ define amdgpu_kernel void @fabs_f32(float addrspace(1)* %out, float addrspace(1) ret void } -; CHECK: 'fabs_v2f32' +; CHECK-LABEL: 'fabs_v2f32' ; CHECK: estimated cost of 0 for {{.*}} call <2 x float> @llvm.fabs.v2f32 define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr @@ -18,7 +19,7 @@ define amdgpu_kernel void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float ret void } -; CHECK: 'fabs_v3f32' +; CHECK-LABEL: 'fabs_v3f32' ; CHECK: estimated cost of 0 for {{.*}} call <3 x float> @llvm.fabs.v3f32 define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr @@ -27,7 +28,7 @@ define amdgpu_kernel void @fabs_v3f32(<3 x float> addrspace(1)* %out, <3 x float ret void } -; CHECK: 'fabs_v5f32' +; CHECK-LABEL: 'fabs_v5f32' ; CHECK: estimated cost of 0 for {{.*}} call <5 x float> @llvm.fabs.v5f32 define amdgpu_kernel void @fabs_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr) #0 { %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr @@ -36,7 +37,7 @@ define amdgpu_kernel void @fabs_v5f32(<5 x float> addrspace(1)* %out, <5 x float ret void } -; CHECK: 'fabs_f64' +; CHECK-LABEL: 'fabs_f64' ; CHECK: estimated cost of 0 for {{.*}} call double @llvm.fabs.f64 define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 { %vec = load double, double addrspace(1)* %vaddr @@ -45,7 +46,7 @@ define amdgpu_kernel void @fabs_f64(double addrspace(1)* %out, double addrspace( ret void } -; CHECK: 'fabs_v2f64' +; CHECK-LABEL: 'fabs_v2f64' ; CHECK: estimated cost of 0 for {{.*}} call <2 x double> @llvm.fabs.v2f64 define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr) #0 { %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr @@ -54,7 +55,7 @@ define amdgpu_kernel void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x doub ret void } -; CHECK: 'fabs_v3f64' +; CHECK-LABEL: 'fabs_v3f64' ; CHECK: estimated cost of 0 for {{.*}} call <3 x double> @llvm.fabs.v3f64 define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr @@ -63,7 +64,7 @@ define amdgpu_kernel void @fabs_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; CHECK: 'fabs_f16' +; CHECK-LABEL: 'fabs_f16' ; CHECK: estimated cost of 0 for {{.*}} call half @llvm.fabs.f16 define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { %vec = load half, half addrspace(1)* %vaddr @@ -72,7 +73,7 @@ define amdgpu_kernel void @fabs_f16(half addrspace(1)* %out, half addrspace(1)* ret void } -; CHECK: 'fabs_v2f16' +; CHECK-LABEL: 'fabs_v2f16' ; CHECK: estimated cost of 0 for {{.*}} call <2 x half> @llvm.fabs.v2f16 define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr @@ -81,7 +82,7 @@ define amdgpu_kernel void @fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } -; CHECK: 'fabs_v3f16' +; CHECK-LABEL: 'fabs_v3f16' ; CHECK: estimated cost of 0 for {{.*}} call <3 x half> @llvm.fabs.v3f16 define amdgpu_kernel void @fabs_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr) #0 { %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll index 5a5a09419..1203182a8 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll @@ -1,5 +1,7 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF64,FASTF16,ALL %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SLOWF64,SLOWF16,ALL %s ; ALL: 'fadd_f32' ; ALL: estimated cost of 1 for {{.*}} fadd float @@ -71,8 +73,8 @@ define amdgpu_kernel void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; ALL 'fadd_f16' -; ALL estimated cost of 1 for {{.*}} fadd half +; ALL: 'fadd_f16' +; ALL: estimated cost of 1 for {{.*}} fadd half define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { %vec = load half, half addrspace(1)* %vaddr %add = fadd half %vec, %b @@ -80,8 +82,9 @@ define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* ret void } -; ALL 'fadd_v2f16' -; ALL estimated cost of 2 for {{.*}} fadd <2 x half> +; ALL: 'fadd_v2f16' +; SLOWF16: estimated cost of 2 for {{.*}} fadd <2 x half> +; FASTF16: estimated cost of 1 for {{.*}} fadd <2 x half> define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fadd <2 x half> %vec, %b @@ -89,8 +92,19 @@ define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> ret void } -; ALL 'fadd_v4f16' -; ALL estimated cost of 4 for {{.*}} fadd <4 x half> +; ALL: 'fadd_v3f16' +; SLOWF16: estimated cost of 4 for {{.*}} fadd <3 x half> +; FASTF16: estimated cost of 2 for {{.*}} fadd <3 x half> +define amdgpu_kernel void @fadd_v3f16(<3 x half> addrspace(1)* %out, <3 x half> addrspace(1)* %vaddr, <3 x half> %b) #0 { + %vec = load <3 x half>, <3 x half> addrspace(1)* %vaddr + %add = fadd <3 x half> %vec, %b + store <3 x half> %add, <3 x half> addrspace(1)* %out + ret void +} + +; ALL: 'fadd_v4f16' +; SLOWF16: estimated cost of 4 for {{.*}} fadd <4 x half> +; FASTF16: estimated cost of 2 for {{.*}} fadd <4 x half> define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fadd <4 x half> %vec, %b diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 325960ac9..6986a3158 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -1,48 +1,91 @@ -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s - -; ALL: 'fdiv_f32' -; NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv float -; FP32DENORMS: estimated cost of 10 for {{.*}} fdiv float -define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,NOFP16,NOFP16-FP32DENORM,SLOWFP32DENORMS %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FASTFP32DENORMS,FP16 %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=ALL,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FASTFP32DENORMS,FP16 %s + +; ALL: 'fdiv_f32_ieee' +; ALL: estimated cost of 10 for {{.*}} fdiv float +define amdgpu_kernel void @fdiv_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { + %vec = load float, float addrspace(1)* %vaddr + %add = fdiv float %vec, %b + store float %add, float addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_f32_ftzdaz' +; ALL: estimated cost of 12 for {{.*}} fdiv float +define amdgpu_kernel void @fdiv_f32_ftzdaz(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #1 { %vec = load float, float addrspace(1)* %vaddr %add = fdiv float %vec, %b store float %add, float addrspace(1)* %out ret void } -; ALL: 'fdiv_v2f32' -; NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x float> -; FP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> -define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 { +; ALL: 'fdiv_v2f32_ieee' +; ALL: estimated cost of 20 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @fdiv_v2f32_ieee(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 { + %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr + %add = fdiv <2 x float> %vec, %b + store <2 x float> %add, <2 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v2f32_ftzdaz' +; ALL: estimated cost of 24 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @fdiv_v2f32_ftzdaz(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #1 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr %add = fdiv <2 x float> %vec, %b store <2 x float> %add, <2 x float> addrspace(1)* %out ret void } -; ALL: 'fdiv_v3f32' +; ALL: 'fdiv_v3f32_ieee' ; Allow for 48/40 when v3f32 is illegal and TargetLowering thinks it needs widening, ; and 36/30 when it is legal. -; NOFP32DENORM: estimated cost of {{36|48}} for {{.*}} fdiv <3 x float> -; FP32DENORMS: estimated cost of {{30|40}} for {{.*}} fdiv <3 x float> -define amdgpu_kernel void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 { +; ALL: estimated cost of {{30|40}} for {{.*}} fdiv <3 x float> +define amdgpu_kernel void @fdiv_v3f32_ieee(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr %add = fdiv <3 x float> %vec, %b store <3 x float> %add, <3 x float> addrspace(1)* %out ret void } -; ALL: 'fdiv_v5f32' +; ALL: 'fdiv_v3f32_ftzdaz' +; Allow for 48/40 when v3f32 is illegal and TargetLowering thinks it needs widening, +; and 36/30 when it is legal. +; ALL: estimated cost of {{36|48}} for {{.*}} fdiv <3 x float> +define amdgpu_kernel void @fdiv_v3f32_ftzdaz(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #1 { + %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr + %add = fdiv <3 x float> %vec, %b + store <3 x float> %add, <3 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v5f32_ieee' +; Allow for 96/80 when v5f32 is illegal and TargetLowering thinks it needs widening, +; and 60/50 when it is legal. +; ALL: estimated cost of {{80|50}} for {{.*}} fdiv <5 x float> +define amdgpu_kernel void @fdiv_v5f32_ieee(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #0 { + %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr + %add = fdiv <5 x float> %vec, %b + store <5 x float> %add, <5 x float> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v5f32_ftzdaz' ; Allow for 96/80 when v5f32 is illegal and TargetLowering thinks it needs widening, ; and 60/50 when it is legal. -; NOFP32DENORM: estimated cost of {{96|60}} for {{.*}} fdiv <5 x float> -; FP32DENORMS: estimated cost of {{80|50}} for {{.*}} fdiv <5 x float> -define amdgpu_kernel void @fdiv_v5f32(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #0 { +; ALL: estimated cost of {{96|60}} for {{.*}} fdiv <5 x float> +define amdgpu_kernel void @fdiv_v5f32_ftzdaz(<5 x float> addrspace(1)* %out, <5 x float> addrspace(1)* %vaddr, <5 x float> %b) #1 { %vec = load <5 x float>, <5 x float> addrspace(1)* %vaddr %add = fdiv <5 x float> %vec, %b store <5 x float> %add, <5 x float> addrspace(1)* %out @@ -85,55 +128,99 @@ define amdgpu_kernel void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x doub ret void } -; ALL: 'fdiv_f16' -; NOFP16-NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv half -; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half +; ALL: 'fdiv_f16_f32_ieee' +; NOFP16: estimated cost of 10 for {{.*}} fdiv half ; FP16: estimated cost of 10 for {{.*}} fdiv half -define amdgpu_kernel void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { +define amdgpu_kernel void @fdiv_f16_f32_ieee(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 { %vec = load half, half addrspace(1)* %vaddr %add = fdiv half %vec, %b store half %add, half addrspace(1)* %out ret void } -; ALL: 'fdiv_v2f16' -; NOFP16-NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x half> -; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half> +; ALL: 'fdiv_f16_f32_ftzdaz' +; NOFP16: estimated cost of 12 for {{.*}} fdiv half +; FP16: estimated cost of 10 for {{.*}} fdiv half +define amdgpu_kernel void @fdiv_f16_f32_ftzdaz(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #1 { + %vec = load half, half addrspace(1)* %vaddr + %add = fdiv half %vec, %b + store half %add, half addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v2f16_f32_ieee' +; NOFP16: estimated cost of 20 for {{.*}} fdiv <2 x half> ; FP16: estimated cost of 20 for {{.*}} fdiv <2 x half> -define amdgpu_kernel void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { +define amdgpu_kernel void @fdiv_v2f16_f32_ieee(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fdiv <2 x half> %vec, %b store <2 x half> %add, <2 x half> addrspace(1)* %out ret void } -; ALL: 'fdiv_v4f16' -; NOFP16-NOFP32DENORM: estimated cost of 48 for {{.*}} fdiv <4 x half> -; NOFP16-FP32DENORM: estimated cost of 40 for {{.*}} fdiv <4 x half> +; ALL: 'fdiv_v2f16_f32_ftzdaz' +; NOFP16: estimated cost of 24 for {{.*}} fdiv <2 x half> +; FP16: estimated cost of 20 for {{.*}} fdiv <2 x half> +define amdgpu_kernel void @fdiv_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #1 { + %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr + %add = fdiv <2 x half> %vec, %b + store <2 x half> %add, <2 x half> addrspace(1)* %out + ret void +} + +; ALL: 'fdiv_v4f16_f32_ieee' +; NOFP16: estimated cost of 40 for {{.*}} fdiv <4 x half> ; FP16: estimated cost of 40 for {{.*}} fdiv <4 x half> -define amdgpu_kernel void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { +define amdgpu_kernel void @fdiv_v4f16_f32_ieee(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 { %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr %add = fdiv <4 x half> %vec, %b store <4 x half> %add, <4 x half> addrspace(1)* %out ret void } -; ALL: 'rcp_f32' -; NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv float +; ALL: 'fdiv_v4f16_f32_ftzdaz' +; NOFP16: estimated cost of 48 for {{.*}} fdiv <4 x half> +; FP16: estimated cost of 40 for {{.*}} fdiv <4 x half> +define amdgpu_kernel void @fdiv_v4f16_f32_ftzdaz(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #1 { + %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr + %add = fdiv <4 x half> %vec, %b + store <4 x half> %add, <4 x half> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f32_ieee' ; SLOWFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float ; FASTFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float -define amdgpu_kernel void @rcp_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 { %vec = load float, float addrspace(1)* %vaddr %add = fdiv float 1.0, %vec store float %add, float addrspace(1)* %out ret void } -; ALL: 'rcp_f16' -; NOFP16-NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv half -; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half +; ALL: 'rcp_f32_ftzdaz' +; ALL: estimated cost of 3 for {{.*}} fdiv float +define amdgpu_kernel void @rcp_f32_ftzdaz(float addrspace(1)* %out, float addrspace(1)* %vaddr) #1 { + %vec = load float, float addrspace(1)* %vaddr + %add = fdiv float 1.0, %vec + store float %add, float addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f16_f32_ieee' +; NOFP16: estimated cost of 10 for {{.*}} fdiv half +; FP16: estimated cost of 3 for {{.*}} fdiv half +define amdgpu_kernel void @rcp_f16_f32_ieee(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { + %vec = load half, half addrspace(1)* %vaddr + %add = fdiv half 1.0, %vec + store half %add, half addrspace(1)* %out + ret void +} + +; ALL: 'rcp_f16_f32_ftzdaz' +; NOFP16: estimated cost of 3 for {{.*}} fdiv half ; FP16: estimated cost of 3 for {{.*}} fdiv half -define amdgpu_kernel void @rcp_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_f16_f32_ftzdaz(half addrspace(1)* %out, half addrspace(1)* %vaddr) #1 { %vec = load half, half addrspace(1)* %vaddr %add = fdiv half 1.0, %vec store half %add, half addrspace(1)* %out @@ -152,26 +239,44 @@ define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double addrspace(1 ret void } -; ALL: 'rcp_v2f32' -; NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x float> +; ALL: 'rcp_v2f32_ieee' ; SLOWFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> ; FASTFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float> -define amdgpu_kernel void @rcp_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_v2f32_ieee(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 { %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr %add = fdiv <2 x float> , %vec store <2 x float> %add, <2 x float> addrspace(1)* %out ret void } -; ALL: 'rcp_v2f16' -; NOFP16-NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x half> -; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half> +; ALL: 'rcp_v2f32_ftzdaz' +; ALL: estimated cost of 6 for {{.*}} fdiv <2 x float> +define amdgpu_kernel void @rcp_v2f32_ftzdaz(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #1 { + %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr + %add = fdiv <2 x float> , %vec + store <2 x float> %add, <2 x float> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_v2f16_f32_ieee' +; NOFP16: estimated cost of 20 for {{.*}} fdiv <2 x half> +; FP16: estimated cost of 6 for {{.*}} fdiv <2 x half> +define amdgpu_kernel void @rcp_v2f16_f32_ieee(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { + %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr + %add = fdiv <2 x half> , %vec + store <2 x half> %add, <2 x half> addrspace(1)* %out + ret void +} + +; ALL: 'rcp_v2f16_f32_ftzdaz' +; NOFP16: estimated cost of 6 for {{.*}} fdiv <2 x half> ; FP16: estimated cost of 6 for {{.*}} fdiv <2 x half> -define amdgpu_kernel void @rcp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 { +define amdgpu_kernel void @rcp_v2f16_f32_ftzdaz(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #1 { %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr %add = fdiv <2 x half> , %vec store <2 x half> %add, <2 x half> addrspace(1)* %out ret void } -attributes #0 = { nounwind } +attributes #0 = { nounwind "target-features"="+fp32-denormals" } +attributes #1 = { nounwind "target-features"="-fp32-denormals" } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll.golden b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll.golden index e142649aa..900c9160a 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll.golden +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll.golden @@ -1,4 +1,4 @@ -define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { +define amdgpu_kernel void @fdiv_f32_ieee(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 { ;