diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 78a9400e8383a..d0a76bc85bcef 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -776,6 +776,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index a4b6846c3d420..5b021c0b68904 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -86,33 +86,37 @@ enum CORINFO_InstructionSet InstructionSet_APX=41, InstructionSet_AVX10v2=42, InstructionSet_AVX10v2_V512=43, - InstructionSet_X86Base_X64=44, - InstructionSet_SSE_X64=45, - InstructionSet_SSE2_X64=46, - InstructionSet_SSE3_X64=47, - InstructionSet_SSSE3_X64=48, - InstructionSet_SSE41_X64=49, - InstructionSet_SSE42_X64=50, - InstructionSet_AVX_X64=51, - InstructionSet_AVX2_X64=52, - InstructionSet_AES_X64=53, - InstructionSet_BMI1_X64=54, - InstructionSet_BMI2_X64=55, - InstructionSet_FMA_X64=56, - InstructionSet_LZCNT_X64=57, - InstructionSet_PCLMULQDQ_X64=58, - InstructionSet_POPCNT_X64=59, - InstructionSet_AVXVNNI_X64=60, - InstructionSet_X86Serialize_X64=61, - InstructionSet_AVX512F_X64=62, - InstructionSet_AVX512BW_X64=63, - InstructionSet_AVX512CD_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX10v1_X64=67, - InstructionSet_AVX10v1_V512_X64=68, - InstructionSet_AVX10v2_X64=69, - InstructionSet_AVX10v2_V512_X64=70, + InstructionSet_GFNI=44, + InstructionSet_GFNI_V256=45, + InstructionSet_GFNI_V512=46, + InstructionSet_X86Base_X64=47, + InstructionSet_SSE_X64=48, + InstructionSet_SSE2_X64=49, + InstructionSet_SSE3_X64=50, + InstructionSet_SSSE3_X64=51, + InstructionSet_SSE41_X64=52, + InstructionSet_SSE42_X64=53, + InstructionSet_AVX_X64=54, + InstructionSet_AVX2_X64=55, + InstructionSet_AES_X64=56, + InstructionSet_BMI1_X64=57, + InstructionSet_BMI2_X64=58, + InstructionSet_FMA_X64=59, + InstructionSet_LZCNT_X64=60, + InstructionSet_PCLMULQDQ_X64=61, + InstructionSet_POPCNT_X64=62, + InstructionSet_AVXVNNI_X64=63, + InstructionSet_X86Serialize_X64=64, + InstructionSet_AVX512F_X64=65, + InstructionSet_AVX512BW_X64=66, + InstructionSet_AVX512CD_X64=67, + InstructionSet_AVX512DQ_X64=68, + InstructionSet_AVX512VBMI_X64=69, + InstructionSet_AVX10v1_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_AVX10v2_X64=72, + InstructionSet_AVX10v2_V512_X64=73, + InstructionSet_GFNI_X64=74, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -158,33 +162,37 @@ enum CORINFO_InstructionSet InstructionSet_APX=41, InstructionSet_AVX10v2=42, InstructionSet_AVX10v2_V512=43, - InstructionSet_X86Base_X64=44, - InstructionSet_SSE_X64=45, - InstructionSet_SSE2_X64=46, - InstructionSet_SSE3_X64=47, - InstructionSet_SSSE3_X64=48, - InstructionSet_SSE41_X64=49, - InstructionSet_SSE42_X64=50, - InstructionSet_AVX_X64=51, - InstructionSet_AVX2_X64=52, - InstructionSet_AES_X64=53, - InstructionSet_BMI1_X64=54, - InstructionSet_BMI2_X64=55, - InstructionSet_FMA_X64=56, - InstructionSet_LZCNT_X64=57, - InstructionSet_PCLMULQDQ_X64=58, - InstructionSet_POPCNT_X64=59, - InstructionSet_AVXVNNI_X64=60, - InstructionSet_X86Serialize_X64=61, - InstructionSet_AVX512F_X64=62, - InstructionSet_AVX512BW_X64=63, - InstructionSet_AVX512CD_X64=64, - InstructionSet_AVX512DQ_X64=65, - InstructionSet_AVX512VBMI_X64=66, - InstructionSet_AVX10v1_X64=67, - InstructionSet_AVX10v1_V512_X64=68, - InstructionSet_AVX10v2_X64=69, - InstructionSet_AVX10v2_V512_X64=70, + InstructionSet_GFNI=44, + InstructionSet_GFNI_V256=45, + InstructionSet_GFNI_V512=46, + InstructionSet_X86Base_X64=47, + InstructionSet_SSE_X64=48, + InstructionSet_SSE2_X64=49, + InstructionSet_SSE3_X64=50, + InstructionSet_SSSE3_X64=51, + InstructionSet_SSE41_X64=52, + InstructionSet_SSE42_X64=53, + InstructionSet_AVX_X64=54, + InstructionSet_AVX2_X64=55, + InstructionSet_AES_X64=56, + InstructionSet_BMI1_X64=57, + InstructionSet_BMI2_X64=58, + InstructionSet_FMA_X64=59, + InstructionSet_LZCNT_X64=60, + InstructionSet_PCLMULQDQ_X64=61, + InstructionSet_POPCNT_X64=62, + InstructionSet_AVXVNNI_X64=63, + InstructionSet_X86Serialize_X64=64, + InstructionSet_AVX512F_X64=65, + InstructionSet_AVX512BW_X64=66, + InstructionSet_AVX512CD_X64=67, + InstructionSet_AVX512DQ_X64=68, + InstructionSet_AVX512VBMI_X64=69, + InstructionSet_AVX10v1_X64=70, + InstructionSet_AVX10v1_V512_X64=71, + InstructionSet_AVX10v2_X64=72, + InstructionSet_AVX10v2_V512_X64=73, + InstructionSet_GFNI_X64=74, #endif // TARGET_X86 }; @@ -354,6 +362,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVX10v2_X64); if (HasInstructionSet(InstructionSet_AVX10v2_V512)) AddInstructionSet(InstructionSet_AVX10v2_V512_X64); + if (HasInstructionSet(InstructionSet_GFNI)) + AddInstructionSet(InstructionSet_GFNI_X64); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 @@ -542,6 +552,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512_X64); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE); if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) @@ -618,6 +632,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -736,6 +760,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -984,6 +1018,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v2_V512"; case InstructionSet_AVX10v2_V512_X64 : return "AVX10v2_V512_X64"; + case InstructionSet_GFNI : + return "GFNI"; + case InstructionSet_GFNI_X64 : + return "GFNI_X64"; + case InstructionSet_GFNI_V256 : + return "GFNI_V256"; + case InstructionSet_GFNI_V512 : + return "GFNI_V512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -1072,6 +1114,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v2"; case InstructionSet_AVX10v2_V512 : return "AVX10v2_V512"; + case InstructionSet_GFNI : + return "GFNI"; + case InstructionSet_GFNI_V256 : + return "GFNI_V256"; + case InstructionSet_GFNI_V512 : + return "GFNI_V512"; #endif // TARGET_X86 default: @@ -1147,6 +1195,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; + case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; + case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; + case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1189,6 +1240,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; + case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; + case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; + case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 2f0b67174805a..82fbaf1c0d108 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 9ed85c09-d33d-4855-80ea-e3b7330e8173 */ - 0x9ed85c09, - 0xd33d, - 0x4855, - {0x80, 0xea, 0xe3, 0xb7, 0x33, 0x0e, 0x81, 0x73} +constexpr GUID JITEEVersionIdentifier = { /* 64146448-11b1-4f94-b1f2-edce91fbcb33 */ + 0x64146448, + 0x11b1, + 0x4f94, + {0xb1, 0xf2, 0xed, 0xce, 0x91, 0xfb, 0xcb, 0x33} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 6b7256163b77c..05d534f2eeac7 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -60,6 +60,9 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Pclmulqdq_V512=50, READYTORUN_INSTRUCTION_Avx10v2=51, READYTORUN_INSTRUCTION_Avx10v2_V512=52, + READYTORUN_INSTRUCTION_Gfni=53, + READYTORUN_INSTRUCTION_Gfni_V256=54, + READYTORUN_INSTRUCTION_Gfni_V512=55, }; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 149dea4a3774f..f1db977d09b5a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6162,6 +6162,13 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_FMA); } + if (JitConfig.EnableGFNI() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256); + instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512); + } + if (JitConfig.EnableLZCNT() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 5656ac3e68bc4..e4f7b70182167 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1599,6 +1599,12 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const return false; } + case INS_gf2p8affineinvqb: + case INS_gf2p8affineqb: + { + return TakesVexPrefix(ins); + } + default: { unreached(); @@ -19836,6 +19842,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_vpdpwssd: case INS_vpdpbusds: case INS_vpdpwssds: + case INS_gf2p8affineinvqb: + case INS_gf2p8affineqb: + case INS_gf2p8mulb: result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency += PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index e598d774d9dd9..9480c2d23b6d5 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -809,6 +809,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // APX { NI_Illegal, NI_Illegal }, // AVX10v2 { NI_Illegal, NI_Illegal }, // AVX10v2_V512 + { FIRST_NI_GFNI, LAST_NI_GFNI }, + { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, + { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, @@ -836,6 +839,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64 { NI_Illegal, NI_Illegal }, // AVX10v2_X64 { NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64 + { NI_Illegal, NI_Illegal }, // GFNI_X64 #elif defined (TARGET_ARM64) { FIRST_NI_ArmBase, LAST_NI_ArmBase }, { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 80a7093a284d4..a60b8ffd773cc 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1572,6 +1572,39 @@ HARDWARE_INTRINSIC(POPCNT_X64, PopCount, HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) #define LAST_NI_X86Serialize NI_X86Serialize_Serialize +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI NI_GFNI_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI NI_GFNI_GaloisFieldMultiply + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldMultiply + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// GFNI Intrinsics +#define FIRST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldAffineTransform +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +#define LAST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldMultiply + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 0139b61921ee4..d4651444bf5e7 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -60,6 +60,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_BMI2_X64; case InstructionSet_FMA: return InstructionSet_FMA_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_X64; case InstructionSet_LZCNT: return InstructionSet_LZCNT_X64; case InstructionSet_PCLMULQDQ: @@ -112,6 +114,8 @@ static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) { + case InstructionSet_GFNI: + return InstructionSet_GFNI_V256; case InstructionSet_PCLMULQDQ: return InstructionSet_PCLMULQDQ_V256; default: @@ -135,6 +139,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX10v1_V512; case InstructionSet_AVX10v1_X64: return InstructionSet_AVX10v1_V512_X64; + case InstructionSet_GFNI: + return InstructionSet_GFNI_V512; case InstructionSet_PCLMULQDQ: return InstructionSet_PCLMULQDQ_V512; default: @@ -224,6 +230,13 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) return InstructionSet_FMA; } } + else if (className[0] == 'G') + { + if (strcmp(className, "Gfni") == 0) + { + return InstructionSet_GFNI; + } + } else if (className[0] == 'L') { if (strcmp(className, "Lzcnt") == 0) @@ -898,6 +911,10 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX10v1_V512: case InstructionSet_AVX10v1_V512_X64: case InstructionSet_EVEX: + case InstructionSet_GFNI: + case InstructionSet_GFNI_X64: + case InstructionSet_GFNI_V256: + case InstructionSet_GFNI_V512: { return true; } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 4442e39b7a885..bfb15df2cf397 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -393,7 +393,7 @@ INST3(psignb, "psignb", IUM_WR, BAD_CODE, BAD_CODE, INST3(psignd, "psignd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0A), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN INST3(psignw, "psignw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x09), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed SIGN -// AESNI & PCLMULQDQ +// AESNI, PCLMULQDQ, & GFNI INST3(aesdec, "aesdec", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDE), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES decryption flow INST3(aesdeclast, "aesdeclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES decryption flow INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDC), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform one round of an AES encryption flow @@ -401,6 +401,9 @@ INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords +INST3(gf2p8affineinvqb, "gf2p8affineinvqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation Inverse +INST3(gf2p8affineqb, "gf2p8affineqb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCE), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Galois Field Affine Transformation +INST3(gf2p8mulb, "gf2p8mulb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xCF), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Multiply Bytes // SSE4.1 INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 36162934bdf1b..faa9b0e7e45bf 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -405,6 +405,7 @@ RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", RELEASE_CONFIG_INTEGER(EnableBMI1, "EnableBMI1", 1) // Allows BMI1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2", 1) // Allows BMI2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableGFNI, "EnableGFNI", 1) // Allows GFNI+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", 1) // Allows LZCNT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8552fc85702f0..8c45736d55536 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2592,6 +2592,19 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicTernaryLogic(node); } + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: + { + // Managed API surfaces these with only UBYTE operands. + // We retype in order to support EVEX embedded broadcast of op2 + node->SetSimdBaseJitType(CORINFO_TYPE_ULONG); + break; + } + default: break; } @@ -9414,6 +9427,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX10v1_TernaryLogic: case NI_AVX10v1_V512_Range: case NI_AVX10v1_V512_Reduce: + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: { assert(!supportsSIMDScalarLoads); @@ -11332,6 +11351,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX10v1_V512_InsertVector128: case NI_AVX10v1_V512_InsertVector256: case NI_AVX10v1_V512_Range: + case NI_GFNI_GaloisFieldAffineTransform: + case NI_GFNI_GaloisFieldAffineTransformInverse: + case NI_GFNI_V256_GaloisFieldAffineTransform: + case NI_GFNI_V256_GaloisFieldAffineTransformInverse: + case NI_GFNI_V512_GaloisFieldAffineTransform: + case NI_GFNI_V512_GaloisFieldAffineTransformInverse: { if (!isContainedImm) { diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 119cf6a1e6cff..dd3f4ca8cf14d 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -81,6 +81,7 @@ private static class XArchIntrinsicConstants public const int Apx = 0x100000; public const int Vpclmulqdq = 0x200000; public const int Avx10v2 = 0x400000; + public const int Gfni = 0x800000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -150,6 +151,14 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avx10v2"); if (((flags & Avx10v2) != 0) && ((flags & Avx512) != 0)) builder.AddSupportedInstructionSet("avx10v2_v512"); + if ((flags & Gfni) != 0) + { + builder.AddSupportedInstructionSet("gfni"); + if ((flags & Avx) != 0) + builder.AddSupportedInstructionSet("gfni_v256"); + if ((flags & Avx512) != 0) + builder.AddSupportedInstructionSet("gfni_v512"); + } } public static int FromInstructionSet(InstructionSet instructionSet) @@ -219,6 +228,10 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVX10v2_X64 => Avx10v2, InstructionSet.X64_AVX10v2_V512 => (Avx10v2 | Avx512), InstructionSet.X64_AVX10v2_V512_X64 => (Avx10v2 | Avx512), + InstructionSet.X64_GFNI => Gfni, + InstructionSet.X64_GFNI_X64 => Gfni, + InstructionSet.X64_GFNI_V256 => (Gfni | Avx), + InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 0c45280a51cca..1d2a4074a5458 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -358,13 +358,23 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, if (_supportedInstructionSets.Contains("avx10v1")) _supportedInstructionSets.Add("avx10v1_v512"); + if (_supportedInstructionSets.Contains("avx10v2")) + _supportedInstructionSets.Add("avx10v2_v512"); + + if (_supportedInstructionSets.Contains("gfni")) + _supportedInstructionSets.Add("gfni_v512"); + if (_supportedInstructionSets.Contains("vpclmul")) _supportedInstructionSets.Add("vpclmul_v512"); + } - // Having AVX10V2 and any AVX-512 instruction sets enabled, - // automatically implies AVX10V2-V512 as well. - if (_supportedInstructionSets.Contains("avx10v2")) - _supportedInstructionSets.Add("avx10v2_v512"); + if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx"))) + { + // These ISAs should automatically extend to 256-bit if + // AVX is enabled. + + if (_supportedInstructionSets.Contains("gfni")) + _supportedInstructionSets.Add("gfni_v256"); } foreach (string supported in _supportedInstructionSets) diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index d412ee5598881..14940fc74db9e 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -193,6 +193,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("popcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lzcnt"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("serialize"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni"); // If AVX was enabled, we can opportunistically enable instruction sets which use the VEX encodings Debug.Assert(InstructionSet.X64_AVX == InstructionSet.X86_AVX); @@ -210,6 +211,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v256"); } Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); @@ -230,6 +232,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2_v512"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } else if (targetArchitecture == TargetArchitecture.ARM64) diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index afcfd808bffac..d486d9338bf7a 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -63,6 +63,9 @@ public enum ReadyToRunInstructionSet Pclmulqdq_V512=50, Avx10v2=51, Avx10v2_V512=52, + Gfni=53, + Gfni_V256=54, + Gfni_V512=55, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 9c4042e255b41..5e50f7cc00b7b 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -127,6 +127,10 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX10v2_X64: return ReadyToRunInstructionSet.Avx10v2; case InstructionSet.X64_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512; case InstructionSet.X64_AVX10v2_V512_X64: return ReadyToRunInstructionSet.Avx10v2_V512; + case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; + case InstructionSet.X64_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; default: throw new Exception("Unknown instruction set"); } @@ -206,6 +210,10 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX10v2_X64: return null; case InstructionSet.X86_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512; case InstructionSet.X86_AVX10v2_V512_X64: return null; + case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni; + case InstructionSet.X86_GFNI_X64: return null; + case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; + case InstructionSet.X86_GFNI_V512: return ReadyToRunInstructionSet.Gfni_V512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 18d9c54a11ec4..e4f67b4070fe4 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -84,6 +84,9 @@ public enum InstructionSet X64_APX = InstructionSet_X64.APX, X64_AVX10v2 = InstructionSet_X64.AVX10v2, X64_AVX10v2_V512 = InstructionSet_X64.AVX10v2_V512, + X64_GFNI = InstructionSet_X64.GFNI, + X64_GFNI_V256 = InstructionSet_X64.GFNI_V256, + X64_GFNI_V512 = InstructionSet_X64.GFNI_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -111,6 +114,7 @@ public enum InstructionSet X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, X64_AVX10v2_X64 = InstructionSet_X64.AVX10v2_X64, X64_AVX10v2_V512_X64 = InstructionSet_X64.AVX10v2_V512_X64, + X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, X86_SSE2 = InstructionSet_X86.SSE2, @@ -154,6 +158,9 @@ public enum InstructionSet X86_APX = InstructionSet_X86.APX, X86_AVX10v2 = InstructionSet_X86.AVX10v2, X86_AVX10v2_V512 = InstructionSet_X86.AVX10v2_V512, + X86_GFNI = InstructionSet_X86.GFNI, + X86_GFNI_V256 = InstructionSet_X86.GFNI_V256, + X86_GFNI_V512 = InstructionSet_X86.GFNI_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -181,6 +188,7 @@ public enum InstructionSet X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, X86_AVX10v2_X64 = InstructionSet_X86.AVX10v2_X64, X86_AVX10v2_V512_X64 = InstructionSet_X86.AVX10v2_V512_X64, + X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, } public enum InstructionSet_ARM64 { @@ -260,33 +268,37 @@ public enum InstructionSet_X64 APX = 41, AVX10v2 = 42, AVX10v2_V512 = 43, - X86Base_X64 = 44, - SSE_X64 = 45, - SSE2_X64 = 46, - SSE3_X64 = 47, - SSSE3_X64 = 48, - SSE41_X64 = 49, - SSE42_X64 = 50, - AVX_X64 = 51, - AVX2_X64 = 52, - AES_X64 = 53, - BMI1_X64 = 54, - BMI2_X64 = 55, - FMA_X64 = 56, - LZCNT_X64 = 57, - PCLMULQDQ_X64 = 58, - POPCNT_X64 = 59, - AVXVNNI_X64 = 60, - X86Serialize_X64 = 61, - AVX512F_X64 = 62, - AVX512BW_X64 = 63, - AVX512CD_X64 = 64, - AVX512DQ_X64 = 65, - AVX512VBMI_X64 = 66, - AVX10v1_X64 = 67, - AVX10v1_V512_X64 = 68, - AVX10v2_X64 = 69, - AVX10v2_V512_X64 = 70, + GFNI = 44, + GFNI_V256 = 45, + GFNI_V512 = 46, + X86Base_X64 = 47, + SSE_X64 = 48, + SSE2_X64 = 49, + SSE3_X64 = 50, + SSSE3_X64 = 51, + SSE41_X64 = 52, + SSE42_X64 = 53, + AVX_X64 = 54, + AVX2_X64 = 55, + AES_X64 = 56, + BMI1_X64 = 57, + BMI2_X64 = 58, + FMA_X64 = 59, + LZCNT_X64 = 60, + PCLMULQDQ_X64 = 61, + POPCNT_X64 = 62, + AVXVNNI_X64 = 63, + X86Serialize_X64 = 64, + AVX512F_X64 = 65, + AVX512BW_X64 = 66, + AVX512CD_X64 = 67, + AVX512DQ_X64 = 68, + AVX512VBMI_X64 = 69, + AVX10v1_X64 = 70, + AVX10v1_V512_X64 = 71, + AVX10v2_X64 = 72, + AVX10v2_V512_X64 = 73, + GFNI_X64 = 74, } public enum InstructionSet_X86 @@ -336,33 +348,37 @@ public enum InstructionSet_X86 APX = 41, AVX10v2 = 42, AVX10v2_V512 = 43, - X86Base_X64 = 44, - SSE_X64 = 45, - SSE2_X64 = 46, - SSE3_X64 = 47, - SSSE3_X64 = 48, - SSE41_X64 = 49, - SSE42_X64 = 50, - AVX_X64 = 51, - AVX2_X64 = 52, - AES_X64 = 53, - BMI1_X64 = 54, - BMI2_X64 = 55, - FMA_X64 = 56, - LZCNT_X64 = 57, - PCLMULQDQ_X64 = 58, - POPCNT_X64 = 59, - AVXVNNI_X64 = 60, - X86Serialize_X64 = 61, - AVX512F_X64 = 62, - AVX512BW_X64 = 63, - AVX512CD_X64 = 64, - AVX512DQ_X64 = 65, - AVX512VBMI_X64 = 66, - AVX10v1_X64 = 67, - AVX10v1_V512_X64 = 68, - AVX10v2_X64 = 69, - AVX10v2_V512_X64 = 70, + GFNI = 44, + GFNI_V256 = 45, + GFNI_V512 = 46, + X86Base_X64 = 47, + SSE_X64 = 48, + SSE2_X64 = 49, + SSE3_X64 = 50, + SSSE3_X64 = 51, + SSE41_X64 = 52, + SSE42_X64 = 53, + AVX_X64 = 54, + AVX2_X64 = 55, + AES_X64 = 56, + BMI1_X64 = 57, + BMI2_X64 = 58, + FMA_X64 = 59, + LZCNT_X64 = 60, + PCLMULQDQ_X64 = 61, + POPCNT_X64 = 62, + AVXVNNI_X64 = 63, + X86Serialize_X64 = 64, + AVX512F_X64 = 65, + AVX512BW_X64 = 66, + AVX512CD_X64 = 67, + AVX512DQ_X64 = 68, + AVX512VBMI_X64 = 69, + AVX10v1_X64 = 70, + AVX10v1_V512_X64 = 71, + AVX10v2_X64 = 72, + AVX10v2_V512_X64 = 73, + GFNI_X64 = 74, } public unsafe struct InstructionSetFlags : IEnumerable @@ -690,6 +706,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -766,6 +786,16 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) @@ -885,6 +915,16 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X86_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) @@ -1045,6 +1085,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) @@ -1121,6 +1163,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) @@ -1240,6 +1292,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256); + if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) @@ -1399,6 +1461,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X64_AVX10v2, true); yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X64_AVX10v2_V512, true); + yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true); + yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true); + yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true); break; case TargetArchitecture.X86: @@ -1445,6 +1510,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X86_AVX10v2, true); yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X86_AVX10v2_V512, true); + yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true); + yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true); + yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true); break; } } @@ -1530,6 +1598,8 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVX10v2_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); + if (HasInstructionSet(InstructionSet.X64_GFNI)) + AddInstructionSet(InstructionSet.X64_GFNI_X64); break; case TargetArchitecture.X86: @@ -1582,6 +1652,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); AddInstructionSet(InstructionSet.X64_AVX10v2_X64); AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); + AddInstructionSet(InstructionSet.X64_GFNI_X64); break; case TargetArchitecture.X86: @@ -1612,6 +1683,7 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); AddInstructionSet(InstructionSet.X86_AVX10v2_X64); AddInstructionSet(InstructionSet.X86_AVX10v2_V512_X64); + AddInstructionSet(InstructionSet.X86_GFNI_X64); break; } } @@ -1928,6 +2000,18 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVX10v2; } + case "Gfni": + if (nestedTypeName == "X64") + { return InstructionSet.X64_GFNI_X64; } + else + if (nestedTypeName == "V256") + { return InstructionSet.X64_GFNI_V256; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X64_GFNI_V512; } + else + { return InstructionSet.X64_GFNI; } + } break; @@ -2055,6 +2139,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X86_AVX10v2; } + case "Gfni": + if (nestedTypeName == "V256") + { return InstructionSet.X86_GFNI_V256; } + else + if (nestedTypeName == "V512") + { return InstructionSet.X86_GFNI_V512; } + else + { return InstructionSet.X86_GFNI; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 99a15a2cacfe1..d206c3056f813 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -70,6 +70,9 @@ instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 instructionset ,X86 ,Apx , ,48 ,APX ,apx instructionset ,X86 ,Avx10v2 , ,51 ,AVX10v2 ,avx10v2 instructionset ,X86 ,Avx10v2_V512 , ,52 ,AVX10v2_V512 ,avx10v2_v512 +instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni +instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 +instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -98,6 +101,7 @@ instructionset64bit,X86 ,AVX10v1 instructionset64bit,X86 ,AVX10v1_V512 instructionset64bit,X86 ,AVX10v2 instructionset64bit,X86 ,AVX10v2_V512 +instructionset64bit,X86 ,GFNI vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 @@ -155,6 +159,11 @@ implication ,X86 ,PCLMULQDQ_V512 ,PCLMULQDQ_V256 implication ,X86 ,PCLMULQDQ_V512 ,AVX512F implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,X86Serialize ,X86Base +implication ,X86 ,GFNI ,SSE41 +implication ,X86 ,GFNI_V256 ,GFNI +implication ,X86 ,GFNI_V256 ,AVX +implication ,X86 ,GFNI_V512 ,GFNI +implication ,X86 ,GFNI_V512 ,AVX512F implication ,X86 ,AVX10v1 ,EVEX implication ,X86 ,AVX10v1_V512 ,AVX10v1 diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 2dab2b5edaf3a..fc91f9df26fe4 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1408,6 +1408,13 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_X86Serialize); } + if (((cpuFeatures & XArchIntrinsicConstants_Gfni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableGFNI)) + { + CPUCompileFlags.Set(InstructionSet_GFNI); + CPUCompileFlags.Set(InstructionSet_GFNI_V256); + CPUCompileFlags.Set(InstructionSet_GFNI_V512); + } + if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0)) { @@ -1415,11 +1422,7 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_EVEX); CPUCompileFlags.Set(InstructionSet_AVX10v1); - - if((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) - { - CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); - } + CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } } @@ -1428,10 +1431,7 @@ void EEJitManager::SetCpuInfo() if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) { CPUCompileFlags.Set(InstructionSet_AVX10v2); - if((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) - { - CPUCompileFlags.Set(InstructionSet_AVX10v2_V512); - } + CPUCompileFlags.Set(InstructionSet_AVX10v2_V512); } } diff --git a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml index c3b4fb52768d3..9e3e8a1d6e720 100644 --- a/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml +++ b/src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml @@ -30,6 +30,9 @@ + + + @@ -99,6 +102,18 @@ + + + + + + + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b8cb658322f89..ff2f86e7106fa 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2627,6 +2627,7 @@ + @@ -2654,6 +2655,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.PlatformNotSupported.cs new file mode 100644 index 0000000000000..d16e7275a8e9d --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.PlatformNotSupported.cs @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to X86 GFNI hardware instructions via intrinsics. + [CLSCompliant(false)] + public abstract class Gfni : Sse41 + { + internal Gfni() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get => false; } + + /// Provides access to the X86 GFNI hardware instructions that are only available to 64-bit processes, via intrinsics. + public new abstract class X64 : Sse41.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get => false; } + } + + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get => false; } + + /// + /// __m256i _mm256_gf2p8affineinv_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEINVQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransformInverse(Vector256 x, Vector256 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m256i _mm256_gf2p8affine_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransform(Vector256 x, Vector256 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m256i _mm256_gf2p8mul_epi8 (__m256i a, __m256i b) + /// GF2P8MULB ymm1, ymm2/m256 + /// VGF2P8MULB ymm1, ymm2, ymm3/m256 + /// VGF2P8MULB ymm1{k1}{z}, ymm2, ymm3/m256 + /// + public static Vector256 GaloisFieldMultiply(Vector256 left, Vector256 right) => throw new PlatformNotSupportedException(); + } + + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { [Intrinsic] get => false; } + + /// + /// __m512i _mm512_gf2p8affineinv_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEINVQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransformInverse(Vector512 x, Vector512 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m512i _mm512_gf2p8affine_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransform(Vector512 x, Vector512 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m512i _mm512_gf2p8mul_epi8 (__m512i a, __m512i b) + /// GF2P8MULB zmm1, zmm2/m512 + /// VGF2P8MULB zmm1, zmm2, zmm3/m512 + /// VGF2P8MULB zmm1{k1}{z}, zmm2, zmm3/m512 + /// + public static Vector512 GaloisFieldMultiply(Vector512 left, Vector512 right) => throw new PlatformNotSupportedException(); + } + + /// + /// __m128i _mm_gf2p8affineinv_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEINVQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransformInverse(Vector128 x, Vector128 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m128i _mm_gf2p8affine_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransform(Vector128 x, Vector128 a, [ConstantExpected] byte b) => throw new PlatformNotSupportedException(); + /// + /// __m128i _mm_gf2p8mul_epi8 (__m128i a, __m128i b) + /// GF2P8MULB xmm1, xmm2/m128 + /// VGF2P8MULB xmm1, xmm2, xmm3/m128 + /// VGF2P8MULB xmm1{k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 GaloisFieldMultiply(Vector128 left, Vector128 right) => throw new PlatformNotSupportedException(); + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs new file mode 100644 index 0000000000000..4496d9e821bb6 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Gfni.cs @@ -0,0 +1,121 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + /// Provides access to X86 GFNI hardware instructions via intrinsics. + [Intrinsic] + [CLSCompliant(false)] + public abstract class Gfni : Sse41 + { + internal Gfni() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// Provides access to the X86 GFNI hardware instructions that are only available to 64-bit processes, via intrinsics. + [Intrinsic] + public new abstract class X64 : Sse41.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + + [Intrinsic] + public abstract class V256 + { + internal V256() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m256i _mm256_gf2p8affineinv_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEINVQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEINVQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransformInverse(Vector256 x, Vector256 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m256i _mm256_gf2p8affine_epi64_epi8 (__m256i x, __m256i A, int b) + /// GF2P8AFFINEQB ymm1, ymm2/m256, imm8 + /// VGF2P8AFFINEQB ymm1, ymm2, ymm3/m256, imm8 + /// VGF2P8AFFINEQB ymm1{k1}{z}, ymm2, ymm3/m256/m64bcst, imm8 + /// + public static Vector256 GaloisFieldAffineTransform(Vector256 x, Vector256 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m256i _mm256_gf2p8mul_epi8 (__m256i a, __m256i b) + /// GF2P8MULB ymm1, ymm2/m256 + /// VGF2P8MULB ymm1, ymm2, ymm3/m256 + /// VGF2P8MULB ymm1{k1}{z}, ymm2, ymm3/m256 + /// + public static Vector256 GaloisFieldMultiply(Vector256 left, Vector256 right) => GaloisFieldMultiply(left, right); + } + + [Intrinsic] + public abstract class V512 + { + internal V512() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static bool IsSupported { get => IsSupported; } + + /// + /// __m512i _mm512_gf2p8affineinv_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEINVQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEINVQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransformInverse(Vector512 x, Vector512 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m512i _mm512_gf2p8affine_epi64_epi8 (__m512i x, __m512i A, int b) + /// GF2P8AFFINEQB zmm1, zmm2/m512, imm8 + /// VGF2P8AFFINEQB zmm1, zmm2, zmm3/m512, imm8 + /// VGF2P8AFFINEQB zmm1{k1}{z}, zmm2, zmm3/m512/m64bcst, imm8 + /// + public static Vector512 GaloisFieldAffineTransform(Vector512 x, Vector512 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m512i _mm512_gf2p8mul_epi8 (__m512i a, __m512i b) + /// GF2P8MULB zmm1, zmm2/m512 + /// VGF2P8MULB zmm1, zmm2, zmm3/m512 + /// VGF2P8MULB zmm1{k1}{z}, zmm2, zmm3/m512 + /// + public static Vector512 GaloisFieldMultiply(Vector512 left, Vector512 right) => GaloisFieldMultiply(left, right); + } + + /// + /// __m128i _mm_gf2p8affineinv_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEINVQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEINVQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransformInverse(Vector128 x, Vector128 a, [ConstantExpected] byte b) => GaloisFieldAffineTransformInverse(x, a, b); + /// + /// __m128i _mm_gf2p8affine_epi64_epi8 (__m128i x, __m128i A, int b) + /// GF2P8AFFINEQB xmm1, xmm2/m128, imm8 + /// VGF2P8AFFINEQB xmm1, xmm2, xmm3/m128, imm8 + /// VGF2P8AFFINEQB xmm1{k1}{z}, xmm2, xmm3/m128/m64bcst, imm8 + /// + public static Vector128 GaloisFieldAffineTransform(Vector128 x, Vector128 a, [ConstantExpected] byte b) => GaloisFieldAffineTransform(x, a, b); + /// + /// __m128i _mm_gf2p8mul_epi8 (__m128i a, __m128i b) + /// GF2P8MULB xmm1, xmm2/m128 + /// VGF2P8MULB xmm1, xmm2, xmm3/m128 + /// VGF2P8MULB xmm1{k1}{z}, xmm2, xmm3/m128 + /// + public static Vector128 GaloisFieldMultiply(Vector128 left, Vector128 right) => GaloisFieldMultiply(left, right); + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 5cc8110d47bbc..20aae5b67f0e7 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -9176,6 +9176,37 @@ internal X64() { } public static new bool IsSupported { get { throw null; } } } } + + [System.CLSCompliantAttribute(false)] + public abstract partial class Gfni : System.Runtime.Intrinsics.X86.Sse41 + { + internal Gfni() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector128 x, System.Runtime.Intrinsics.Vector128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector128 x, System.Runtime.Intrinsics.Vector128 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector128 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Sse41.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + public abstract partial class V256 + { + internal V256() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector256 x, System.Runtime.Intrinsics.Vector256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector256 x, System.Runtime.Intrinsics.Vector256 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector256 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + } + public abstract partial class V512 + { + internal V512() { } + public static bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldAffineTransformInverse(System.Runtime.Intrinsics.Vector512 x, System.Runtime.Intrinsics.Vector512 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldAffineTransform(System.Runtime.Intrinsics.Vector512 x, System.Runtime.Intrinsics.Vector512 a, [System.Diagnostics.CodeAnalysis.ConstantExpectedAttribute] byte b) { throw null; } + public static System.Runtime.Intrinsics.Vector512 GaloisFieldMultiply(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + } + } } namespace System.Runtime.Intrinsics.Wasm { diff --git a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml index 7050d50dc71c5..1e99c7fba5fef 100644 --- a/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml +++ b/src/mono/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.Intrinsics.x86.xml @@ -81,6 +81,18 @@ + + + + + + + + + + + + diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index e82cf1b59228b..9c3661fde1fea 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -271,6 +271,11 @@ int minipal_getcpufeatures(void) { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); + if ((cpuidInfo[CPUID_ECX] & (1 << 8)) != 0) // GFNI + { + result |= XArchIntrinsicConstants_Gfni; + } + if ((cpuidInfo[CPUID_ECX] & (1 << 10)) != 0) // VPCLMULQDQ { result |= XArchIntrinsicConstants_Vpclmulqdq; diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 1a48dbb92dd60..d67d5e0aa9866 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -34,6 +34,7 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Apx = 0x100000, XArchIntrinsicConstants_Vpclmulqdq = 0x200000, XArchIntrinsicConstants_Avx10v2 = 0x400000, + XArchIntrinsicConstants_Gfni = 0x800000, }; #endif // HOST_X86 || HOST_AMD64 diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index ab266063f1040..60eb2f2c9e822 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -83,6 +83,44 @@ succeeded = (expectedResult == result);"; +const string GaloisFieldMultiplyTest_ValidationLogic = @"static {RetBaseType} GaloisFieldMultiply({Op1BaseType} a, {Op1BaseType} b) + { + int ret = 0; + for (int i = 0; i < 8; i++) + { + if (((b >> i) & 1) != 0) + { + ret ^= a << i; + } + } + + for (int i = 14; i >= 8; i--) + { + if (((ret >> i) & 1) != 0) + { + ret ^= 0x11B << (i - 8); + } + } + + return ({RetBaseType})ret; + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((left[0] != default) && result[0] != mask.Value))) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) + { + succeeded = false; + break; + } + } + }"; + const string HorizontalOpTest_ValidationLogic = @"for (var outer = 0; outer < (LargestVectorSize / 16); outer++) { for (var inner = 0; inner < (8 / sizeof({RetBaseType})); inner++) @@ -190,6 +228,7 @@ (string templateFileName, string outputTemplateName, Dictionary templateData)[] Templates = new[] { ("_BinaryOpTestTemplate.template", "AlternatingBinOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingBinOpTest_ValidationLogic }), + ("_BinaryOpTestTemplate.template", "GfniMultiplyTest.template", new Dictionary { ["TemplateName"] = "GfniMultiply",["TemplateValidationLogic"] = GaloisFieldMultiplyTest_ValidationLogic }), ("_BinaryOpTestTemplate.template", "HorizontalBinOpTest.template", new Dictionary { ["TemplateName"] = "Horizontal", ["TemplateValidationLogic"] = HorizontalOpTest_ValidationLogic }), ("_BinaryOpTestTemplate.template", "SimpleBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleBinOpTest_ValidationLogic }), ("_BooleanBinaryOpTestTemplate.template", "BooleanBinOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanOpTest_ValidationLogic }), @@ -3511,6 +3550,39 @@ ("ScalarTernOpTupleBinRetTest.template", new Dictionary { ["Isa"] = "X86Base.X64", ["Method"] = "DivRem", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["Op3BaseType"] = "UInt64", ["NextValueOp1"] = "1", ["NextValueOp2"] = "1", ["NextValueOp3"] = "0x8000000000000000", ["ValidateResult"] = "ulong expectedQuotient = 2; ulong expectedReminder = 1; isUnexpectedResult = (expectedQuotient != ret1) || (expectedReminder != ret2);" }), }; +(string templateFileName, Dictionary templateData)[] GfniInputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni", ["LoadIsa"] = "Sse2", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + +(string templateFileName, Dictionary templateData)[] GfniV256Inputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni.V256", ["LoadIsa"] = "Avx", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + +(string templateFileName, Dictionary templateData)[] GfniV512Inputs = new[] +{ + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != left[0] >>> 1", ["ValidateRemainingResults"] = "result[i] != left[i] >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (left[0] >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (left[i] >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransform", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(left[0], new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(left[i], new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "0", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != Invert(left[0]) >>> 1", ["ValidateRemainingResults"] = "result[i] != Invert(left[i]) >>> 1"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "NextMatrixByte(i)", ["ValidateFirstResult"] = "result[0] != (Invert(left[0]) >>> 1 ^ 1)", ["ValidateRemainingResults"] = "result[i] != (Invert(left[i]) >>> 1 ^ 1)"}), + ("GfniAffineTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldAffineTransformInverse", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "170", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != AffineTransform(Invert(left[0]), new ReadOnlySpan(right, 0, 8), 170)", ["ValidateRemainingResults"] = "result[i] != AffineTransform(Invert(left[i]), new ReadOnlySpan(right, i & ~7, 8), 170)"}), + ("GfniMultiplyTest.template", new Dictionary { ["Isa"] = "Gfni.V512", ["LoadIsa"] = "Avx512F", ["Method"] = "GaloisFieldMultiply", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != GaloisFieldMultiply(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != GaloisFieldMultiply(left[i], right[i])"}), +}; + Dictionary extraHelperFiles = new Dictionary { ["SseVerify"] = @"..\..\X86\Shared\SseVerify.cs", @@ -3536,7 +3608,7 @@ bool isImmTemplate(string name) name == "ExtractVector128Test.template" || name == "InsertLoadTest.template" || name == "ExtractStoreTest.template" || name == "ImmBinOpTest.template" || name == "AesImmOpTest.template" || name == "PclmulqdqOpTest.template" || - name == "ImmTernOpTest.template"; + name == "ImmTernOpTest.template" || name == "GfniAffineTest.template"; } string projectName = args[0]; @@ -3592,6 +3664,9 @@ bool isImmTemplate(string name) ProcessInputs("Bmi2.X64", Bmi2X64Inputs); ProcessInputs("X86Base", X86BaseInputs); ProcessInputs("X86Base.X64", X86BaseX64Inputs); +ProcessInputs("Gfni", GfniInputs); +ProcessInputs("Gfni.V256", GfniV256Inputs); +ProcessInputs("Gfni.V512", GfniV512Inputs); void ProcessInputs(string groupName, (string templateFileName, Dictionary templateData)[] inputs) { diff --git a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs index f03c8671ae9ae..18241d9ecfc40 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/General/IsSupported.cs @@ -77,6 +77,10 @@ public static void IsSupported() Convert.ToBoolean(typeof(AvxVnni.X64).GetMethod(issupported).Invoke(null, null)) != AvxVnni.X64.IsSupported || Convert.ToBoolean(typeof(Fma).GetMethod(issupported).Invoke(null, null)) != Fma.IsSupported || Convert.ToBoolean(typeof(Fma.X64).GetMethod(issupported).Invoke(null, null)) != Fma.X64.IsSupported || + Convert.ToBoolean(typeof(Gfni).GetMethod(issupported).Invoke(null, null)) != Gfni.IsSupported || + Convert.ToBoolean(typeof(Gfni.V256).GetMethod(issupported).Invoke(null, null)) != Gfni.V256.IsSupported || + Convert.ToBoolean(typeof(Gfni.V512).GetMethod(issupported).Invoke(null, null)) != Gfni.V512.IsSupported || + Convert.ToBoolean(typeof(Gfni.X64).GetMethod(issupported).Invoke(null, null)) != Gfni.X64.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq.V256).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V256.IsSupported || Convert.ToBoolean(typeof(Pclmulqdq.V512).GetMethod(issupported).Invoke(null, null)) != Pclmulqdq.V512.IsSupported || diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj new file mode 100644 index 0000000000000..1181a4692a6ab --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V256_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj new file mode 100644 index 0000000000000..6a7176a8cc25c --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Gfni.V256_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V256_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs new file mode 100644 index 0000000000000..e96ee84db1957 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V256/Program.Gfni.V256.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni.V256 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj new file mode 100644 index 0000000000000..e7f35d018936b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V512_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj new file mode 100644 index 0000000000000..bb2cbfdf77b31 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Gfni.V512_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni.V512_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs new file mode 100644 index 0000000000000..8cf8a20f1c47a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni.V512/Program.Gfni.V512.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni.V512 +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj new file mode 100644 index 0000000000000..a582882424907 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_r.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj new file mode 100644 index 0000000000000..aaaf8b0bded09 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Gfni_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Gfni_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs new file mode 100644 index 0000000000000..16721c947bbf2 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Gfni/Program.Gfni.cs @@ -0,0 +1,17 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +[assembly:Xunit.ActiveIssue("https://github.com/dotnet/runtime/issues/91392", typeof(TestLibrary.PlatformDetection), nameof(TestLibrary.PlatformDetection.IsMonoLLVMAOT))] +namespace JIT.HardwareIntrinsics.X86._Gfni +{ + public static partial class Program + { + static Program() + { + + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template new file mode 100644 index 0000000000000..c46aa44c4aa31 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/GfniAffineTest.template @@ -0,0 +1,340 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}{Imm}() + { + var test = new GfniAffineTest__{Method}{RetBaseType}{Imm}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class GfniAffineTest__{Method}{RetBaseType}{Imm} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op2VectorType}<{Op2BaseType}> _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(GfniAffineTest__{Method}{RetBaseType}{Imm} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op2VectorType}<{Op2BaseType}> _fld2; + + private SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}> _dataTable; + + private static byte NextMatrixByte(int i) => ((ReadOnlySpan)[0x00, 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02])[i % 8]; + + public GfniAffineTest__{Method}{RetBaseType}{Imm}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + _dataTable = new SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}>(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {Imm} + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof(byte) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + (byte){Imm} + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var left = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var right = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var result = {Isa}.{Method}(left, right, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(left, right, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, {Imm}); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> left, {Op2VectorType}<{Op2BaseType}> right, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), left); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), right); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(left), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(right), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + +#pragma warning disable 8321 // emulation methods not used in all test forms + static {Op1BaseType} Invert({Op1BaseType} x) => ((ReadOnlySpan<{Op1BaseType}>)[ + 0x00, 0x01, 0x8D, 0xF6, 0xCB, 0x52, 0x7B, 0xD1, 0xE8, 0x4F, 0x29, 0xC0, 0xB0, 0xE1, 0xE5, 0xC7, + 0x74, 0xB4, 0xAA, 0x4B, 0x99, 0x2B, 0x60, 0x5F, 0x58, 0x3F, 0xFD, 0xCC, 0xFF, 0x40, 0xEE, 0xB2, + 0x3A, 0x6E, 0x5A, 0xF1, 0x55, 0x4D, 0xA8, 0xC9, 0xC1, 0x0A, 0x98, 0x15, 0x30, 0x44, 0xA2, 0xC2, + 0x2C, 0x45, 0x92, 0x6C, 0xF3, 0x39, 0x66, 0x42, 0xF2, 0x35, 0x20, 0x6F, 0x77, 0xBB, 0x59, 0x19, + 0x1D, 0xFE, 0x37, 0x67, 0x2D, 0x31, 0xF5, 0x69, 0xA7, 0x64, 0xAB, 0x13, 0x54, 0x25, 0xE9, 0x09, + 0xED, 0x5C, 0x05, 0xCA, 0x4C, 0x24, 0x87, 0xBF, 0x18, 0x3E, 0x22, 0xF0, 0x51, 0xEC, 0x61, 0x17, + 0x16, 0x5E, 0xAF, 0xD3, 0x49, 0xA6, 0x36, 0x43, 0xF4, 0x47, 0x91, 0xDF, 0x33, 0x93, 0x21, 0x3B, + 0x79, 0xB7, 0x97, 0x85, 0x10, 0xB5, 0xBA, 0x3C, 0xB6, 0x70, 0xD0, 0x06, 0xA1, 0xFA, 0x81, 0x82, + 0x83, 0x7E, 0x7F, 0x80, 0x96, 0x73, 0xBE, 0x56, 0x9B, 0x9E, 0x95, 0xD9, 0xF7, 0x02, 0xB9, 0xA4, + 0xDE, 0x6A, 0x32, 0x6D, 0xD8, 0x8A, 0x84, 0x72, 0x2A, 0x14, 0x9F, 0x88, 0xF9, 0xDC, 0x89, 0x9A, + 0xFB, 0x7C, 0x2E, 0xC3, 0x8F, 0xB8, 0x65, 0x48, 0x26, 0xC8, 0x12, 0x4A, 0xCE, 0xE7, 0xD2, 0x62, + 0x0C, 0xE0, 0x1F, 0xEF, 0x11, 0x75, 0x78, 0x71, 0xA5, 0x8E, 0x76, 0x3D, 0xBD, 0xBC, 0x86, 0x57, + 0x0B, 0x28, 0x2F, 0xA3, 0xDA, 0xD4, 0xE4, 0x0F, 0xA9, 0x27, 0x53, 0x04, 0x1B, 0xFC, 0xAC, 0xE6, + 0x7A, 0x07, 0xAE, 0x63, 0xC5, 0xDB, 0xE2, 0xEA, 0x94, 0x8B, 0xC4, 0xD5, 0x9D, 0xF8, 0x90, 0x6B, + 0xB1, 0x0D, 0xD6, 0xEB, 0xC6, 0x0E, 0xCF, 0xAD, 0x08, 0x4E, 0xD7, 0xE3, 0x5D, 0x50, 0x1E, 0xB3, + 0x5B, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8C, 0xDD, 0x9C, 0x7D, 0xA0, 0xCD, 0x1A, 0x41, 0x1C + ])[x]; + + static {RetBaseType} AffineTransform({Op1BaseType} x, ReadOnlySpan<{Op2BaseType}> A, byte b) + { + int ret = 0; + for (int i = 0; i < 8; i++) + { + ret |= (int.PopCount(A[7 - i] & x) & 1) << i; + } + + return ({RetBaseType})(ret ^ b); + } +#pragma warning restore 8321 + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>.{Imm}, {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index 48c3421ac83e6..d7c3e1aee3af8 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -108,6 +108,8 @@ public unsafe static void CpuId() testResult = Fail; } + bool isSse41HierarchyDisabled = isHierarchyDisabled; + if (IsBitIncorrect(ecx, 20, typeof(Sse42), Sse42.IsSupported, "SSE42", ref isHierarchyDisabled)) { testResult = Fail; @@ -284,6 +286,27 @@ public unsafe static void CpuId() testResult = Fail; } + isHierarchyDisabled = isSse41HierarchyDisabled; + + if (IsBitIncorrect(ecx, 8, typeof(Gfni), Gfni.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + isHierarchyDisabled = isAvxHierarchyDisabled; + + if (IsBitIncorrect(ecx, 8, typeof(Gfni.V256), Gfni.V256.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + isHierarchyDisabled = isAvx512HierarchyDisabled; + + if (IsBitIncorrect(ecx, 8, typeof(Gfni.V512), Gfni.V512.IsSupported, "GFNI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + isHierarchyDisabled = isAvxHierarchyDisabled; if (IsBitIncorrect(ecx, 10, typeof(Pclmulqdq.V256), Pclmulqdq.V256.IsSupported, "VPCLMULQDQ", ref isHierarchyDisabled)) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 5cc2e2c985a0b..a2de34ea0788e 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -2257,6 +2257,15 @@ https://github.com/dotnet/runtime/issues/75767 + + https://github.com/dotnet/runtime/issues/91392 + + + https://github.com/dotnet/runtime/issues/91392 + + + https://github.com/dotnet/runtime/issues/91392 + https://github.com/dotnet/runtime/issues/75767 diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs index 7d891dbac1a41..8ec15aed2ff2f 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/Program.cs @@ -71,6 +71,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = false; + bool? ExpectedGfniV512 = false; #elif SSE42_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -96,6 +99,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = false; + bool? ExpectedGfniV512 = false; #elif AVX_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -121,6 +127,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = false; #elif AVX2_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -146,6 +155,9 @@ static int Main() bool? ExpectedAvx512DQ = false; bool? ExpectedAvx512Vbmi = false; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = false; #elif AVX512_INTRINSICS bool? ExpectedSse3 = true; bool? ExpectedSsse3 = true; @@ -171,6 +183,9 @@ static int Main() bool? ExpectedAvx512DQ = true; bool? ExpectedAvx512Vbmi = null; bool? ExpectedX86Serialize = null; + bool? ExpectedGfni = null; + bool? ExpectedGfniV256 = null; + bool? ExpectedGfniV512 = null; #else #error Who dis? #endif @@ -272,6 +287,11 @@ static int Main() Check("X86Serialize", ExpectedX86Serialize, &X86SerializeIsSupported, X86Serialize.IsSupported, () => { X86Serialize.Serialize(); return true; } ); Check("X86Serialize.X64", ExpectedX86Serialize, &X86SerializeX64IsSupported, X86Serialize.X64.IsSupported, null); + Check("Gfni", ExpectedGfni, &GfniIsSupported, Gfni.IsSupported, () => Gfni.GaloisFieldMultiply(Vector128.Zero, Vector128.Zero).Equals(Vector128.Zero)); + Check("Gfni.V256", ExpectedGfniV256, &GfniV256IsSupported, Gfni.V256.IsSupported, () => Gfni.V256.GaloisFieldMultiply(Vector256.Zero, Vector256.Zero).Equals(Vector256.Zero)); + Check("Gfni.V512", ExpectedGfniV512, &GfniV512IsSupported, Gfni.V512.IsSupported, () => Gfni.V512.GaloisFieldMultiply(Vector512.Zero, Vector512.Zero).Equals(Vector512.Zero)); + Check("Gfni.X64", ExpectedGfni, &GfniX64IsSupported, Gfni.X64.IsSupported, null); + return s_success ? 100 : 1; } @@ -333,6 +353,10 @@ static int Main() static bool Avx512VbmiX64IsSupported() => Avx512Vbmi.X64.IsSupported; static bool X86SerializeIsSupported() => X86Serialize.IsSupported; static bool X86SerializeX64IsSupported() => X86Serialize.X64.IsSupported; + static bool GfniIsSupported() => Gfni.IsSupported; + static bool GfniV256IsSupported() => Gfni.V256.IsSupported; + static bool GfniV512IsSupported() => Gfni.V512.IsSupported; + static bool GfniX64IsSupported() => Gfni.X64.IsSupported; static bool IsConstantTrue(delegate* code) {