Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GFNI Intrinsics #109537

Merged
merged 8 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"), 1, "Allows BMI2+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableGFNI, W("EnableGFNI"), 1, "Allows GFNI+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled")
Expand Down
162 changes: 108 additions & 54 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,33 +86,37 @@ enum CORINFO_InstructionSet
InstructionSet_APX=41,
InstructionSet_AVX10v2=42,
InstructionSet_AVX10v2_V512=43,
InstructionSet_X86Base_X64=44,
InstructionSet_SSE_X64=45,
InstructionSet_SSE2_X64=46,
InstructionSet_SSE3_X64=47,
InstructionSet_SSSE3_X64=48,
InstructionSet_SSE41_X64=49,
InstructionSet_SSE42_X64=50,
InstructionSet_AVX_X64=51,
InstructionSet_AVX2_X64=52,
InstructionSet_AES_X64=53,
InstructionSet_BMI1_X64=54,
InstructionSet_BMI2_X64=55,
InstructionSet_FMA_X64=56,
InstructionSet_LZCNT_X64=57,
InstructionSet_PCLMULQDQ_X64=58,
InstructionSet_POPCNT_X64=59,
InstructionSet_AVXVNNI_X64=60,
InstructionSet_X86Serialize_X64=61,
InstructionSet_AVX512F_X64=62,
InstructionSet_AVX512BW_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512VBMI_X64=66,
InstructionSet_AVX10v1_X64=67,
InstructionSet_AVX10v1_V512_X64=68,
InstructionSet_AVX10v2_X64=69,
InstructionSet_AVX10v2_V512_X64=70,
InstructionSet_GFNI=44,
InstructionSet_GFNI_V256=45,
InstructionSet_GFNI_V512=46,
InstructionSet_X86Base_X64=47,
InstructionSet_SSE_X64=48,
InstructionSet_SSE2_X64=49,
InstructionSet_SSE3_X64=50,
InstructionSet_SSSE3_X64=51,
InstructionSet_SSE41_X64=52,
InstructionSet_SSE42_X64=53,
InstructionSet_AVX_X64=54,
InstructionSet_AVX2_X64=55,
InstructionSet_AES_X64=56,
InstructionSet_BMI1_X64=57,
InstructionSet_BMI2_X64=58,
InstructionSet_FMA_X64=59,
InstructionSet_LZCNT_X64=60,
InstructionSet_PCLMULQDQ_X64=61,
InstructionSet_POPCNT_X64=62,
InstructionSet_AVXVNNI_X64=63,
InstructionSet_X86Serialize_X64=64,
InstructionSet_AVX512F_X64=65,
InstructionSet_AVX512BW_X64=66,
InstructionSet_AVX512CD_X64=67,
InstructionSet_AVX512DQ_X64=68,
InstructionSet_AVX512VBMI_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_AVX10v2_X64=72,
InstructionSet_AVX10v2_V512_X64=73,
InstructionSet_GFNI_X64=74,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -158,33 +162,37 @@ enum CORINFO_InstructionSet
InstructionSet_APX=41,
InstructionSet_AVX10v2=42,
InstructionSet_AVX10v2_V512=43,
InstructionSet_X86Base_X64=44,
InstructionSet_SSE_X64=45,
InstructionSet_SSE2_X64=46,
InstructionSet_SSE3_X64=47,
InstructionSet_SSSE3_X64=48,
InstructionSet_SSE41_X64=49,
InstructionSet_SSE42_X64=50,
InstructionSet_AVX_X64=51,
InstructionSet_AVX2_X64=52,
InstructionSet_AES_X64=53,
InstructionSet_BMI1_X64=54,
InstructionSet_BMI2_X64=55,
InstructionSet_FMA_X64=56,
InstructionSet_LZCNT_X64=57,
InstructionSet_PCLMULQDQ_X64=58,
InstructionSet_POPCNT_X64=59,
InstructionSet_AVXVNNI_X64=60,
InstructionSet_X86Serialize_X64=61,
InstructionSet_AVX512F_X64=62,
InstructionSet_AVX512BW_X64=63,
InstructionSet_AVX512CD_X64=64,
InstructionSet_AVX512DQ_X64=65,
InstructionSet_AVX512VBMI_X64=66,
InstructionSet_AVX10v1_X64=67,
InstructionSet_AVX10v1_V512_X64=68,
InstructionSet_AVX10v2_X64=69,
InstructionSet_AVX10v2_V512_X64=70,
InstructionSet_GFNI=44,
InstructionSet_GFNI_V256=45,
InstructionSet_GFNI_V512=46,
InstructionSet_X86Base_X64=47,
InstructionSet_SSE_X64=48,
InstructionSet_SSE2_X64=49,
InstructionSet_SSE3_X64=50,
InstructionSet_SSSE3_X64=51,
InstructionSet_SSE41_X64=52,
InstructionSet_SSE42_X64=53,
InstructionSet_AVX_X64=54,
InstructionSet_AVX2_X64=55,
InstructionSet_AES_X64=56,
InstructionSet_BMI1_X64=57,
InstructionSet_BMI2_X64=58,
InstructionSet_FMA_X64=59,
InstructionSet_LZCNT_X64=60,
InstructionSet_PCLMULQDQ_X64=61,
InstructionSet_POPCNT_X64=62,
InstructionSet_AVXVNNI_X64=63,
InstructionSet_X86Serialize_X64=64,
InstructionSet_AVX512F_X64=65,
InstructionSet_AVX512BW_X64=66,
InstructionSet_AVX512CD_X64=67,
InstructionSet_AVX512DQ_X64=68,
InstructionSet_AVX512VBMI_X64=69,
InstructionSet_AVX10v1_X64=70,
InstructionSet_AVX10v1_V512_X64=71,
InstructionSet_AVX10v2_X64=72,
InstructionSet_AVX10v2_V512_X64=73,
InstructionSet_GFNI_X64=74,
#endif // TARGET_X86

};
Expand Down Expand Up @@ -354,6 +362,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_AVX10v2_X64);
if (HasInstructionSet(InstructionSet_AVX10v2_V512))
AddInstructionSet(InstructionSet_AVX10v2_V512_X64);
if (HasInstructionSet(InstructionSet_GFNI))
AddInstructionSet(InstructionSet_GFNI_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
Expand Down Expand Up @@ -542,6 +552,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512_X64);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
Expand Down Expand Up @@ -618,6 +632,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
Expand Down Expand Up @@ -736,6 +760,16 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_X86Serialize);
if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_SSE41))
resultflags.RemoveInstructionSet(InstructionSet_GFNI);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V256) && !resultflags.HasInstructionSet(InstructionSet_AVX))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX))
resultflags.RemoveInstructionSet(InstructionSet_AVX10v1);
if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1))
Expand Down Expand Up @@ -984,6 +1018,14 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX10v2_V512";
case InstructionSet_AVX10v2_V512_X64 :
return "AVX10v2_V512_X64";
case InstructionSet_GFNI :
return "GFNI";
case InstructionSet_GFNI_X64 :
return "GFNI_X64";
case InstructionSet_GFNI_V256 :
return "GFNI_V256";
case InstructionSet_GFNI_V512 :
return "GFNI_V512";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -1072,6 +1114,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX10v2";
case InstructionSet_AVX10v2_V512 :
return "AVX10v2_V512";
case InstructionSet_GFNI :
return "GFNI";
case InstructionSet_GFNI_V256 :
return "GFNI_V256";
case InstructionSet_GFNI_V512 :
return "GFNI_V512";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -1147,6 +1195,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2;
case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512;
case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI;
case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256;
case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -1189,6 +1240,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX;
case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2;
case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512;
case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI;
case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256;
case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 9ed85c09-d33d-4855-80ea-e3b7330e8173 */
0x9ed85c09,
0xd33d,
0x4855,
{0x80, 0xea, 0xe3, 0xb7, 0x33, 0x0e, 0x81, 0x73}
constexpr GUID JITEEVersionIdentifier = { /* 64146448-11b1-4f94-b1f2-edce91fbcb33 */
0x64146448,
0x11b1,
0x4f94,
{0xb1, 0xf2, 0xed, 0xce, 0x91, 0xfb, 0xcb, 0x33}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Pclmulqdq_V512=50,
READYTORUN_INSTRUCTION_Avx10v2=51,
READYTORUN_INSTRUCTION_Avx10v2_V512=52,
READYTORUN_INSTRUCTION_Gfni=53,
READYTORUN_INSTRUCTION_Gfni_V256=54,
READYTORUN_INSTRUCTION_Gfni_V512=55,

};

Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6162,6 +6162,13 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_FMA);
}

if (JitConfig.EnableGFNI() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI);
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V256);
instructionSetFlags.AddInstructionSet(InstructionSet_GFNI_V512);
}

if (JitConfig.EnableLZCNT() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_LZCNT);
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,12 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
return false;
}

case INS_gf2p8affineinvqb:
case INS_gf2p8affineqb:
{
return TakesVexPrefix(ins);
}

default:
{
unreached();
Expand Down Expand Up @@ -19836,6 +19842,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpdpwssd:
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_gf2p8affineinvqb:
case INS_gf2p8affineqb:
case INS_gf2p8mulb:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_5C;
break;
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,9 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // APX
{ NI_Illegal, NI_Illegal }, // AVX10v2
{ NI_Illegal, NI_Illegal }, // AVX10v2_V512
{ FIRST_NI_GFNI, LAST_NI_GFNI },
{ FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 },
{ FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 },
{ FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 },
{ FIRST_NI_SSE_X64, LAST_NI_SSE_X64 },
{ FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 },
Expand Down Expand Up @@ -836,6 +839,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64
{ NI_Illegal, NI_Illegal }, // AVX10v2_X64
{ NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64
{ NI_Illegal, NI_Illegal }, // GFNI_X64
#elif defined (TARGET_ARM64)
{ FIRST_NI_ArmBase, LAST_NI_ArmBase },
{ FIRST_NI_AdvSimd, LAST_NI_AdvSimd },
Expand Down
Loading
Loading