Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARM64-SVE: Add ShiftRightArithmeticForDivide #104279

Merged
merged 4 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(instrIsRMW);

insScalableOpts sopt;
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;
bool hasShift = false;

switch (intrinEmbMask.id)
{
Expand All @@ -601,17 +602,34 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
assert(emitter::optGetSveInsOpt(op2Size) == INS_OPTS_SCALABLE_D);
sopt = INS_SCALABLE_OPTS_WIDE;
break;
}

FALLTHROUGH;
break;
}

case NI_Sve_ShiftRightArithmeticForDivide:
hasShift = true;
break;

default:
sopt = INS_SCALABLE_OPTS_NONE;
break;
}

auto emitInsHelper = [&](regNumber reg1, regNumber reg2, regNumber reg3) {
if (hasShift)
{
HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic());
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), opt,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For table generated variant, can you verify using JitStress what code looks like for special cases below (where we use movprfx, etc.)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I'm getting for the reflection scenario, without any JitStress:

G_M55611_IG01:  ;; offset=0x0000
            stp     fp, lr, [sp, #-0x10]!
            mov     fp, sp
                                                ;; size=8 bbWeight=1 PerfScore 1.50
G_M55611_IG02:  ;; offset=0x0008
            uxtb    w0, w0
            sub     w1, w0, #1
            cmp     w1, #8
            bhs     G_M55611_IG12
            ptrue   p0.b
            movprfx z0.b, p0/z, z0.b
            adr     x1, [G_M55611_IG03]
            add     x1, x1, x0,  LSL #3
            sub     x1, x1, #8
            br      x1
                                                ;; size=40 bbWeight=1 PerfScore 9.50
G_M55611_IG03:  ;; offset=0x0030
            asrd    z0.b, p0/m, z0.b, #1
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG04:  ;; offset=0x0038
            asrd    z0.b, p0/m, z0.b, #2
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG05:  ;; offset=0x0040
            asrd    z0.b, p0/m, z0.b, #3
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG06:  ;; offset=0x0048
            asrd    z0.b, p0/m, z0.b, #4
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG07:  ;; offset=0x0050
            asrd    z0.b, p0/m, z0.b, #5
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG08:  ;; offset=0x0058
            asrd    z0.b, p0/m, z0.b, #6
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG09:  ;; offset=0x0060
            asrd    z0.b, p0/m, z0.b, #7
            b       G_M55611_IG11
                                                ;; size=8 bbWeight=1 PerfScore 4.00
G_M55611_IG10:  ;; offset=0x0068
            asrd    z0.b, p0/m, z0.b, #8
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG11:  ;; offset=0x006C
            ldp     fp, lr, [sp], #0x10
            ret     lr
                                                ;; size=8 bbWeight=1 PerfScore 2.00
G_M55611_IG12:  ;; offset=0x0074
            bl      CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION
            brk_windows #0
                                                ;; size=8 bbWeight=0 PerfScore 0.00

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And with JitStress=2:

G_M55611_IG01:  ;; offset=0x0000
            stp     fp, lr, [sp, #-0x20]!
            mov     fp, sp
            movz    x1, #0x5678
            movk    x1, #0x1234 LSL #16
            movk    x1, #0xDEF0 LSL #32
            movk    x1, #0x9ABC LSL #48
            str     x1, [fp, #0x18]     // [V04 GsCookie]
                                                ;; size=28 bbWeight=1 PerfScore 4.50
G_M55611_IG02:  ;; offset=0x001C
            uxtb    w0, w0
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG03:  ;; offset=0x0020
            sub     w1, w0, #1
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG04:  ;; offset=0x0024
            cmp     w1, #8
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG05:  ;; offset=0x0028
            bhs     G_M55611_IG36
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG06:  ;; offset=0x002C
            ptrue   p0.b
                                                ;; size=4 bbWeight=1 PerfScore 2.00
G_M55611_IG07:  ;; offset=0x0030
            movprfx z0.b, p0/z, z0.b
                                                ;; size=4 bbWeight=1 PerfScore 2.00
G_M55611_IG08:  ;; offset=0x0034
            adr     x1, [G_M55611_IG12]
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG09:  ;; offset=0x0038
            add     x1, x1, x0,  LSL #3
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG10:  ;; offset=0x003C
            sub     x1, x1, #8
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG11:  ;; offset=0x0040
            br      x1
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG12:  ;; offset=0x0044
            asrd    z0.b, p0/m, z0.b, #1
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG13:  ;; offset=0x0048
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG14:  ;; offset=0x004C
            asrd    z0.b, p0/m, z0.b, #2
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG15:  ;; offset=0x0050
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG16:  ;; offset=0x0054
            asrd    z0.b, p0/m, z0.b, #3
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG17:  ;; offset=0x0058
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG18:  ;; offset=0x005C
            asrd    z0.b, p0/m, z0.b, #4
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG19:  ;; offset=0x0060
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG20:  ;; offset=0x0064
            asrd    z0.b, p0/m, z0.b, #5
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG21:  ;; offset=0x0068
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG22:  ;; offset=0x006C
            asrd    z0.b, p0/m, z0.b, #6
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG23:  ;; offset=0x0070
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG24:  ;; offset=0x0074
            asrd    z0.b, p0/m, z0.b, #7
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG25:  ;; offset=0x0078
            b       G_M55611_IG27
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG26:  ;; offset=0x007C
            asrd    z0.b, p0/m, z0.b, #8
                                                ;; size=4 bbWeight=1 PerfScore 3.00
G_M55611_IG27:  ;; offset=0x0080
            movz    xip0, #0x5678
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG28:  ;; offset=0x0084
            movk    xip0, #0x1234 LSL #16
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG29:  ;; offset=0x0088
            movk    xip0, #0xDEF0 LSL #32
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG30:  ;; offset=0x008C
            movk    xip0, #0x9ABC LSL #48
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG31:  ;; offset=0x0090
            ldr     xip1, [fp, #0x18]   // [V04 GsCookie]
                                                ;; size=4 bbWeight=1 PerfScore 2.00
G_M55611_IG32:  ;; offset=0x0094
            cmp     xip0, xip1
                                                ;; size=4 bbWeight=1 PerfScore 0.50
G_M55611_IG33:  ;; offset=0x0098
            beq     G_M55611_IG35
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG34:  ;; offset=0x009C
            bl      CORINFO_HELP_FAIL_FAST
                                                ;; size=4 bbWeight=1 PerfScore 1.00
G_M55611_IG35:  ;; offset=0x00A0
            ldp     fp, lr, [sp], #0x20
            ret     lr
                                                ;; size=8 bbWeight=1 PerfScore 2.00
G_M55611_IG36:  ;; offset=0x00A8
            bl      CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION
                                                ;; size=4 bbWeight=0 PerfScore 0.00
G_M55611_IG37:  ;; offset=0x00AC
            brk_windows #0
                                                ;; size=4 bbWeight=0 PerfScore 0.00

sopt);
}
}
else
{
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg3, opt, sopt);
}
};

if (intrin.op3->IsVectorZero())
{
// If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the
Expand All @@ -622,7 +640,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
}
else if (targetReg != falseReg)
{
Expand All @@ -636,8 +654,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
// If the embedded instruction supports optional mask operation, use the "unpredicated"
// version of the instruction, followed by "sel" to select the active lanes.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
embMaskOp2Reg, opt, sopt);
emitInsHelper(targetReg, embMaskOp1Reg, embMaskOp2Reg);
}
else
{
Expand All @@ -651,8 +668,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);

GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
opt, sopt);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
}

GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
Expand All @@ -669,13 +685,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

// Finally, perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
}
else
{
// Just perform the actual "predicated" operation so that `targetReg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, opt, sopt);
emitInsHelper(targetReg, maskReg, embMaskOp2Reg);
}

break;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount,
HARDWARE_INTRINSIC(Sve, Scale, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need to also add handling in lookupImmBounds in hwintrinsicarm64.cpp and in ContainCheckHWIntrinsic() in lowerarmarch.cpp

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a special case for this API to ContainCheckHWIntrinsic. I'm running into issues with lookupImmBounds where if we are passed a ConditionalSelect intrinsic, we need to be able to unwrap the inner intrinsic, but this method is only passed the name of the intrinsic. There are a few places where lookupImmBounds is called and we don't have rich enough intrinsic data yet to do this unwrapping (such as in the importer), though this probably happens before we wrap the intrinsic with ConditionalSelect, right? I did a quick audit of all the other places where we call lookupImmBounds, and at the other call sites, we are either doing the lookup for a specific intrinsic anyway, or we are taking care to look up in the unwrapped intrinsic thanks to other changes in this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kunalspathak do you think it's fine to leave lookupImmBounds as-is for now, since we're handling the embedded mask case in the caller? Or should I tweak lookupImmBounds to take the intrinsic's GenTree node so we can do the embedded mask check in the method itself?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

HARDWARE_INTRINSIC(Sve, ShiftRightLogical, -1, -1, false, {INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_sve_lsr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3369,6 +3369,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
// Handle op2
if (op2->OperIsHWIntrinsic())
{
const GenTreeHWIntrinsic* embOp = op2->AsHWIntrinsic();

if (IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic())
{
uint32_t maskSize = genTypeSize(node->GetSimdBaseType());
Expand All @@ -3384,7 +3386,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
{
// Else check if this operation has an auxiliary type that matches the
// mask size.
GenTreeHWIntrinsic* embOp = op2->AsHWIntrinsic();

// For now, make sure that we get here only for intrinsics that we are
// sure about to rely on auxiliary type's size.
Expand All @@ -3401,6 +3402,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
}
}

// Handle intrinsics with embedded masks and immediate operands
// (For now, just handle ShiftRightArithmeticForDivide specifically)
if (embOp->GetHWIntrinsicId() == NI_Sve_ShiftRightArithmeticForDivide)
{
assert(embOp->GetOperandCount() == 2);
if (embOp->Op(2)->IsCnsIntOrI())
{
MakeSrcContained(op2, embOp->Op(2));
}
}
}

// Handle op3
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1906,6 +1906,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
else
{
assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3));

// Special handling for ShiftRightArithmeticForDivide:
// We might need an additional register to hold branch targets into the switch table
// that encodes the immediate
if (intrinEmb.id == NI_Sve_ShiftRightArithmeticForDivide)
{
assert(embOp2Node->GetOperandCount() == 2);
if (!embOp2Node->Op(2)->isContainedIntOrIImmed())
{
buildInternalIntRegisterDefForNode(embOp2Node);
}
}

tgtPrefUse = BuildUse(embOp2Node->Op(1));
srcCount += 1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5858,6 +5858,45 @@ internal Arm64() { }
public static unsafe Vector<sbyte> ShiftRightArithmetic(Vector<sbyte> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Arithmetic shift right for divide by immediate

/// <summary>
/// svint16_t svasrd[_n_s16]_m(svbool_t pg, svint16_t op1, uint64_t imm2)
/// ASRD Ztied1.H, Pg/M, Ztied1.H, #imm2
/// svint16_t svasrd[_n_s16]_x(svbool_t pg, svint16_t op1, uint64_t imm2)
/// ASRD Ztied1.H, Pg/M, Ztied1.H, #imm2
/// svint16_t svasrd[_n_s16]_z(svbool_t pg, svint16_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<short> ShiftRightArithmeticForDivide(Vector<short> value, [ConstantExpected(Min = 1, Max = (byte)(16))] byte control) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svasrd[_n_s32]_m(svbool_t pg, svint32_t op1, uint64_t imm2)
/// ASRD Ztied1.S, Pg/M, Ztied1.S, #imm2
/// svint32_t svasrd[_n_s32]_x(svbool_t pg, svint32_t op1, uint64_t imm2)
/// ASRD Ztied1.S, Pg/M, Ztied1.S, #imm2
/// svint32_t svasrd[_n_s32]_z(svbool_t pg, svint32_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<int> ShiftRightArithmeticForDivide(Vector<int> value, [ConstantExpected(Min = 1, Max = (byte)(32))] byte control) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svasrd[_n_s64]_m(svbool_t pg, svint64_t op1, uint64_t imm2)
/// ASRD Ztied1.D, Pg/M, Ztied1.D, #imm2
/// svint64_t svasrd[_n_s64]_x(svbool_t pg, svint64_t op1, uint64_t imm2)
/// ASRD Ztied1.D, Pg/M, Ztied1.D, #imm2
/// svint64_t svasrd[_n_s64]_z(svbool_t pg, svint64_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<long> ShiftRightArithmeticForDivide(Vector<long> value, [ConstantExpected(Min = 1, Max = (byte)(64))] byte control) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svasrd[_n_s8]_m(svbool_t pg, svint8_t op1, uint64_t imm2)
/// ASRD Ztied1.B, Pg/M, Ztied1.B, #imm2
/// svint8_t svasrd[_n_s8]_x(svbool_t pg, svint8_t op1, uint64_t imm2)
/// ASRD Ztied1.B, Pg/M, Ztied1.B, #imm2
/// svint8_t svasrd[_n_s8]_z(svbool_t pg, svint8_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<sbyte> ShiftRightArithmeticForDivide(Vector<sbyte> value, [ConstantExpected(Min = 1, Max = (byte)(8))] byte control) { throw new PlatformNotSupportedException(); }


/// Logical shift right

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5902,6 +5902,45 @@ internal Arm64() { }
public static unsafe Vector<sbyte> ShiftRightArithmetic(Vector<sbyte> left, Vector<ulong> right) => ShiftRightArithmetic(left, right);


/// Arithmetic shift right for divide by immediate

/// <summary>
/// svint16_t svasrd[_n_s16]_m(svbool_t pg, svint16_t op1, uint64_t imm2)
/// ASRD Ztied1.H, Pg/M, Ztied1.H, #imm2
/// svint16_t svasrd[_n_s16]_x(svbool_t pg, svint16_t op1, uint64_t imm2)
/// ASRD Ztied1.H, Pg/M, Ztied1.H, #imm2
/// svint16_t svasrd[_n_s16]_z(svbool_t pg, svint16_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<short> ShiftRightArithmeticForDivide(Vector<short> value, [ConstantExpected(Min = 1, Max = (byte)(16))] byte control) => ShiftRightArithmeticForDivide(value, control);

/// <summary>
/// svint32_t svasrd[_n_s32]_m(svbool_t pg, svint32_t op1, uint64_t imm2)
/// ASRD Ztied1.S, Pg/M, Ztied1.S, #imm2
/// svint32_t svasrd[_n_s32]_x(svbool_t pg, svint32_t op1, uint64_t imm2)
/// ASRD Ztied1.S, Pg/M, Ztied1.S, #imm2
/// svint32_t svasrd[_n_s32]_z(svbool_t pg, svint32_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<int> ShiftRightArithmeticForDivide(Vector<int> value, [ConstantExpected(Min = 1, Max = (byte)(32))] byte control) => ShiftRightArithmeticForDivide(value, control);

/// <summary>
/// svint64_t svasrd[_n_s64]_m(svbool_t pg, svint64_t op1, uint64_t imm2)
/// ASRD Ztied1.D, Pg/M, Ztied1.D, #imm2
/// svint64_t svasrd[_n_s64]_x(svbool_t pg, svint64_t op1, uint64_t imm2)
/// ASRD Ztied1.D, Pg/M, Ztied1.D, #imm2
/// svint64_t svasrd[_n_s64]_z(svbool_t pg, svint64_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<long> ShiftRightArithmeticForDivide(Vector<long> value, [ConstantExpected(Min = 1, Max = (byte)(64))] byte control) => ShiftRightArithmeticForDivide(value, control);

/// <summary>
/// svint8_t svasrd[_n_s8]_m(svbool_t pg, svint8_t op1, uint64_t imm2)
/// ASRD Ztied1.B, Pg/M, Ztied1.B, #imm2
/// svint8_t svasrd[_n_s8]_x(svbool_t pg, svint8_t op1, uint64_t imm2)
/// ASRD Ztied1.B, Pg/M, Ztied1.B, #imm2
/// svint8_t svasrd[_n_s8]_z(svbool_t pg, svint8_t op1, uint64_t imm2)
/// </summary>
public static unsafe Vector<sbyte> ShiftRightArithmeticForDivide(Vector<sbyte> value, [ConstantExpected(Min = 1, Max = (byte)(8))] byte control) => ShiftRightArithmeticForDivide(value, control);


/// Logical shift right

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5036,6 +5036,10 @@ internal Arm64() { }
public static System.Numerics.Vector<long> ShiftRightArithmetic(System.Numerics.Vector<long> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<sbyte> ShiftRightArithmetic(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<byte> right) { throw null; }
public static System.Numerics.Vector<sbyte> ShiftRightArithmetic(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<short> ShiftRightArithmeticForDivide(System.Numerics.Vector<short> value, [ConstantExpected(Min = 1, Max = (byte)(16))] byte control) { throw null; }
public static System.Numerics.Vector<int> ShiftRightArithmeticForDivide(System.Numerics.Vector<int> value, [ConstantExpected(Min = 1, Max = (byte)(32))] byte control) { throw null; }
public static System.Numerics.Vector<long> ShiftRightArithmeticForDivide(System.Numerics.Vector<long> value, [ConstantExpected(Min = 1, Max = (byte)(64))] byte control) { throw null; }
public static System.Numerics.Vector<sbyte> ShiftRightArithmeticForDivide(System.Numerics.Vector<sbyte> value, [ConstantExpected(Min = 1, Max = (byte)(8))] byte control) { throw null; }
public static System.Numerics.Vector<byte> ShiftRightLogical(System.Numerics.Vector<byte> left, System.Numerics.Vector<byte> right) { throw null; }
public static System.Numerics.Vector<byte> ShiftRightLogical(System.Numerics.Vector<byte> left, System.Numerics.Vector<ulong> right) { throw null; }
public static System.Numerics.Vector<ushort> ShiftRightLogical(System.Numerics.Vector<ushort> left, System.Numerics.Vector<ushort> right) { throw null; }
Expand Down
Loading
Loading