Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JIT] [APX] Enable additional General Purpose Registers. #108799

Merged
merged 13 commits into from
Feb 7, 2025
Merged
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE41, W("EnableSSE41")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE4.2+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSSE3, W("EnableSSSE3"), 1, "Allows SSSE3+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableX86Serialize, W("EnableX86Serialize"), 1, "Allows X86Serialize+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAPX, W("EnableAPX"), 0, "Allows APX+ features to be disabled")
#elif defined(TARGET_ARM64)
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64AdvSimd, W("EnableArm64AdvSimd"), 1, "Allows Arm64 AdvSimd+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Aes, W("EnableArm64Aes"), 1, "Allows Arm64 Aes+ hardware intrinsics to be disabled")
Expand Down
14 changes: 10 additions & 4 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ void CodeGenInterface::CopyRegisterInfo()
#if defined(TARGET_AMD64)
rbmAllFloat = compiler->rbmAllFloat;
rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
rbmAllInt = compiler->rbmAllInt;
rbmIntCalleeTrash = compiler->rbmIntCalleeTrash;
regIntLast = compiler->regIntLast;
#endif // TARGET_AMD64

rbmAllMask = compiler->rbmAllMask;
Expand Down Expand Up @@ -5356,6 +5359,10 @@ void CodeGen::genFnProlog()
// will be skipped.
bool initRegZeroed = false;
regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
#if defined(TARGET_AMD64)
// TODO-Xarch-apx : Revert. Excluding eGPR so that it's not used for non REX2 supported movs.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order for all instructions that use eGPR to have access to eGPRs, we need both REx2 and eEVEX encoding in the backend. Currently, we have added only Rex2(Ruihan will be adding support for eEVEX in the near future).

Adding eGPRs ro the excludeMask essentially makes sure that we do not allocate eGPRs registers for a few cases for now. Once we have eGPR encoding enabled for all instructions, this change need to be removed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you okay with keeping this or would you prefer I add some additional comments here?

excludeMask = excludeMask | RBM_HIGHINT;
#endif // !defined(TARGET_AMD64)

#ifdef TARGET_ARM
// If we have a variable sized frame (compLocallocUsed is true)
Expand Down Expand Up @@ -5765,7 +5772,7 @@ void CodeGen::genFnProlog()

if (initRegs)
{
for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
for (regNumber reg = REG_INT_FIRST; reg <= get_REG_INT_LAST(); reg = REG_NEXT(reg))
{
regMaskTP regMask = genRegMask(reg);
if (regMask & initRegs)
Expand Down Expand Up @@ -6307,8 +6314,7 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP*
noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));

regMaskTP pushedRegs = regs;

for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
for (regNumber reg = REG_INT_FIRST; reg <= get_REG_INT_LAST(); reg = REG_NEXT(reg))
{
regMaskTP regMask = genRegMask(reg);

Expand Down Expand Up @@ -6380,7 +6386,7 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg
regMaskTP popedRegs = regs;

// Walk the registers in the reverse order as genPushRegs()
for (regNumber reg = REG_INT_LAST; reg >= REG_INT_LAST; reg = REG_PREV(reg))
for (regNumber reg = get_REG_INT_LAST(); reg >= REG_INT_FIRST; reg = REG_PREV(reg))
{
regMaskTP regMask = genRegMask(reg);

Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,16 +76,36 @@ class CodeGenInterface

#if defined(TARGET_AMD64)
regMaskTP rbmAllFloat;
regMaskTP rbmAllInt;
regMaskTP rbmFltCalleeTrash;
regMaskTP rbmIntCalleeTrash;
regNumber regIntLast;

FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
{
return this->rbmAllFloat;
}
FORCEINLINE regMaskTP get_RBM_ALLINT() const
{
return this->rbmAllInt;
}
FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}
FORCEINLINE regMaskTP get_RBM_INT_CALLEE_TRASH() const
{
return this->rbmIntCalleeTrash;
}
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return this->regIntLast;
}
#else
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return REG_INT_LAST;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down
7 changes: 4 additions & 3 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,7 @@ void CodeGen::genCodeForReturnTrap(GenTreeOp* tree)

// emit the call to the EE-helper that stops for GC (or other reasons)
regNumber tmpReg = internalRegisters.GetSingle(tree, RBM_ALLINT);

assert(genIsValidIntReg(tmpReg));

genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
Expand Down Expand Up @@ -9999,7 +10000,7 @@ void CodeGen::genOSRRecordTier0CalleeSavedRegistersAndFrame()

// Now the rest of the Tier0 callee saves.
//
for (regNumber reg = REG_INT_LAST; tier0IntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
for (regNumber reg = get_REG_INT_LAST(); tier0IntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
{
regMaskTP regBit = genRegMask(reg);

Expand Down Expand Up @@ -10090,7 +10091,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()

// The OSR method must use MOVs to save additional callee saves.
//
for (regNumber reg = REG_INT_LAST; osrAdditionalIntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
for (regNumber reg = get_REG_INT_LAST(); osrAdditionalIntCalleeSaves != RBM_NONE; reg = REG_PREV(reg))
{
regMaskTP regBit = genRegMask(reg);

Expand Down Expand Up @@ -10155,7 +10156,7 @@ void CodeGen::genPushCalleeSavedRegisters()

// Push backwards so we match the order we will pop them in the epilog
// and all the other code that expects it to be in this order.
for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
for (regNumber reg = get_REG_INT_LAST(); rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
{
regMaskTP regBit = genRegMask(reg);

Expand Down
18 changes: 18 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3391,12 +3391,25 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;

rbmAllInt = RBM_ALLINT_INIT;
rbmIntCalleeTrash = RBM_INT_CALLEE_TRASH_INIT;
cntCalleeTrashInt = CNT_CALLEE_TRASH_INT_INIT;
regIntLast = REG_R15;

if (canUseEvexEncoding())
{
rbmAllFloat |= RBM_HIGHFLOAT;
rbmFltCalleeTrash |= RBM_HIGHFLOAT;
cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
}

if (canUseApxEncoding())
{
rbmAllInt |= RBM_HIGHINT;
rbmIntCalleeTrash |= RBM_HIGHINT;
cntCalleeTrashInt += CNT_CALLEE_TRASH_HIGHINT;
regIntLast = REG_R23;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down Expand Up @@ -6333,6 +6346,11 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1_V512);
instructionSetFlags.AddInstructionSet(InstructionSet_EVEX);
}

if (JitConfig.EnableAPX() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_APX);
}
#endif

// These calls are important and explicitly ordered to ensure that the flags are correct in
Expand Down
31 changes: 31 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10372,8 +10372,14 @@ class Compiler
// On these platforms we assume the register that the target is
// passed in is preserved by the validator and take care to get the
// target from the register for the call (even in debug mode).
// RBM_INT_CALLEE_TRASH is not known at compile time on TARGET_AMD64 since it's dependent on APX support.
#if defined(TARGET_AMD64)
static_assert_no_msg(
(RBM_VALIDATE_INDIRECT_CALL_TRASH_ALL & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == RBM_NONE);
#else
static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) ==
RBM_NONE);
#endif
if (JitConfig.JitForceControlFlowGuard())
return true;

Expand Down Expand Up @@ -11810,6 +11816,10 @@ class Compiler
regMaskTP rbmFltCalleeTrash;
unsigned cntCalleeTrashFloat;

regMaskTP rbmAllInt;
regMaskTP rbmIntCalleeTrash;
unsigned cntCalleeTrashInt;
regNumber regIntLast;
public:
FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
{
Expand All @@ -11824,6 +11834,27 @@ class Compiler
return this->cntCalleeTrashFloat;
}

FORCEINLINE regMaskTP get_RBM_ALLINT() const
{
return this->rbmAllInt;
}
FORCEINLINE regMaskTP get_RBM_INT_CALLEE_TRASH() const
{
return this->rbmIntCalleeTrash;
}
FORCEINLINE unsigned get_CNT_CALLEE_TRASH_INT() const
{
return this->cntCalleeTrashInt;
}
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return this->regIntLast;
}
#else
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return REG_INT_LAST;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4639,15 +4639,25 @@ inline char* regMaskToString(regMaskTP mask, Compiler* context)

inline void printRegMaskInt(regMaskTP mask)
{
// RBM_ALLINT is not known at compile time on TARGET_AMD64 since it's dependent on APX support. These are used by GC
// exclusively
#if defined(TARGET_AMD64)
printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT_ALL).getLow());
#else // !TARGET_X86
printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow());
#endif // !TARGET_X86
}

inline char* regMaskIntToString(regMaskTP mask, Compiler* context)
{
const size_t cchRegMask = 24;
char* regmask = new (context, CMK_Unknown) char[cchRegMask];

// RBM_ALLINT is not known at compile time on TARGET_AMD64 since it's dependent on APX support. Deprecated????
#if defined(TARGET_AMD64)
sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT_ALL).getLow());
#else // !TARGET_X86
sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow());
#endif // !TARGET_X86

return regmask;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,8 @@ void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle)

#if defined(TARGET_AMD64)
rbmFltCalleeTrash = emitComp->rbmFltCalleeTrash;
rbmIntCalleeTrash = emitComp->rbmIntCalleeTrash;
rbmAllInt = emitComp->rbmAllInt;
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2527,11 +2527,24 @@ class emitter
private:
#if defined(TARGET_AMD64)
regMaskTP rbmFltCalleeTrash;
regMaskTP rbmAllInt;

FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const
{
return this->rbmFltCalleeTrash;
}

regMaskTP rbmIntCalleeTrash;

FORCEINLINE regMaskTP get_RBM_INT_CALLEE_TRASH() const
{
return this->rbmIntCalleeTrash;
}

FORCEINLINE regMaskTP get_RBM_ALLINT() const
{
return this->rbmAllInt;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down
Loading
Loading