forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LLVM][CodeGen][SVE] Implement nxvbf16 fpextend to nxvf32/nxvf64. (ll…
…vm#107253) NOTE: There are no dedicated SVE instructions but bf16->f32 is just a left shift because they share the same exponent range and from there other convert instructions can be used.
- Loading branch information
1 parent
c2018fa
commit be1958f
Showing
3 changed files
with
117 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mattr=+sve < %s | FileCheck %s | ||
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
define <vscale x 2 x float> @fpext_nxv2bf16_to_nxv2f32(<vscale x 2 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv2bf16_to_nxv2f32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: lsl z0.s, z0.s, #16 | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 2 x bfloat> %a to <vscale x 2 x float> | ||
ret <vscale x 2 x float> %res | ||
} | ||
|
||
define <vscale x 4 x float> @fpext_nxv4bf16_to_nxv4f32(<vscale x 4 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv4bf16_to_nxv4f32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: lsl z0.s, z0.s, #16 | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 4 x bfloat> %a to <vscale x 4 x float> | ||
ret <vscale x 4 x float> %res | ||
} | ||
|
||
define <vscale x 8 x float> @fpext_nxv8bf16_to_nxv8f32(<vscale x 8 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv8bf16_to_nxv8f32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uunpklo z1.s, z0.h | ||
; CHECK-NEXT: uunpkhi z2.s, z0.h | ||
; CHECK-NEXT: lsl z0.s, z1.s, #16 | ||
; CHECK-NEXT: lsl z1.s, z2.s, #16 | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 8 x bfloat> %a to <vscale x 8 x float> | ||
ret <vscale x 8 x float> %res | ||
} | ||
|
||
define <vscale x 2 x double> @fpext_nxv2bf16_to_nxv2f64(<vscale x 2 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv2bf16_to_nxv2f64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: lsl z0.s, z0.s, #16 | ||
; CHECK-NEXT: ptrue p0.d | ||
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 2 x bfloat> %a to <vscale x 2 x double> | ||
ret <vscale x 2 x double> %res | ||
} | ||
|
||
define <vscale x 4 x double> @fpext_nxv4bf16_to_nxv4f64(<vscale x 4 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv4bf16_to_nxv4f64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uunpklo z1.d, z0.s | ||
; CHECK-NEXT: uunpkhi z0.d, z0.s | ||
; CHECK-NEXT: ptrue p0.d | ||
; CHECK-NEXT: lsl z1.s, z1.s, #16 | ||
; CHECK-NEXT: lsl z2.s, z0.s, #16 | ||
; CHECK-NEXT: movprfx z0, z1 | ||
; CHECK-NEXT: fcvt z0.d, p0/m, z1.s | ||
; CHECK-NEXT: movprfx z1, z2 | ||
; CHECK-NEXT: fcvt z1.d, p0/m, z2.s | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 4 x bfloat> %a to <vscale x 4 x double> | ||
ret <vscale x 4 x double> %res | ||
} | ||
|
||
define <vscale x 8 x double> @fpext_nxv8bf16_to_nxv8f64(<vscale x 8 x bfloat> %a) { | ||
; CHECK-LABEL: fpext_nxv8bf16_to_nxv8f64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uunpklo z1.s, z0.h | ||
; CHECK-NEXT: uunpkhi z0.s, z0.h | ||
; CHECK-NEXT: ptrue p0.d | ||
; CHECK-NEXT: uunpklo z2.d, z1.s | ||
; CHECK-NEXT: uunpkhi z1.d, z1.s | ||
; CHECK-NEXT: uunpklo z3.d, z0.s | ||
; CHECK-NEXT: uunpkhi z0.d, z0.s | ||
; CHECK-NEXT: lsl z1.s, z1.s, #16 | ||
; CHECK-NEXT: lsl z2.s, z2.s, #16 | ||
; CHECK-NEXT: lsl z3.s, z3.s, #16 | ||
; CHECK-NEXT: lsl z4.s, z0.s, #16 | ||
; CHECK-NEXT: fcvt z1.d, p0/m, z1.s | ||
; CHECK-NEXT: movprfx z0, z2 | ||
; CHECK-NEXT: fcvt z0.d, p0/m, z2.s | ||
; CHECK-NEXT: movprfx z2, z3 | ||
; CHECK-NEXT: fcvt z2.d, p0/m, z3.s | ||
; CHECK-NEXT: movprfx z3, z4 | ||
; CHECK-NEXT: fcvt z3.d, p0/m, z4.s | ||
; CHECK-NEXT: ret | ||
%res = fpext <vscale x 8 x bfloat> %a to <vscale x 8 x double> | ||
ret <vscale x 8 x double> %res | ||
} |