From a74067170f7c24b81c5b3794a6f3758f1aacc05b Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Tue, 20 Mar 2018 20:55:07 -0700 Subject: [PATCH] Tighten arm32/arm64 write barrier kill reg sets The JIT write barrier helpers have a custom calling convention that avoids killing most registers. The JIT was not taking advantage of this, and thus was killing unnecessary registers when a write barrier was necessary. In particular, some integer callee-trash registers are unaffected by the write barriers, and no floating-point register is affected. Also, I got rid of the `FEATURE_WRITE_BARRIER` define, which is always set. I also put some code under `LEGACY_BACKEND` for easier cleanup later. I removed some unused defines in target.h for some platforms. --- src/jit/codegenarm64.cpp | 37 ++-------- src/jit/codegencommon.cpp | 35 ++++++--- src/jit/codegenlegacy.cpp | 26 +------ src/jit/emit.cpp | 5 +- src/jit/emitarm.cpp | 8 +- src/jit/emitarm64.cpp | 10 +++ src/jit/gcinfo.cpp | 3 - src/jit/gentree.cpp | 2 +- src/jit/importer.cpp | 6 +- src/jit/lsra.cpp | 6 +- src/jit/lsrabuild.cpp | 24 +++--- src/jit/optimizer.cpp | 2 +- src/jit/regset.cpp | 2 + src/jit/target.h | 152 +++++++++++++++++++++++--------------- src/vm/arm/asmhelpers.asm | 13 ++-- 15 files changed, 166 insertions(+), 165 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index efefbf10ad36..ade7328db675 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2757,7 +2757,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) } //------------------------------------------------------------------------ -// genCodeForSwap: Produce code for a GT_CMPXCHG node. +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. // // Arguments: // tree - the GT_CMPXCHG node @@ -3006,41 +3006,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) // registers are taken care of. genConsumeOperands(tree); -#if NOGC_WRITE_BARRIERS // At this point, we should not have any interference. // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF, // as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF); - // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet()); - } - - // 'data' goes into x15 (REG_WRITE_BARRIER) - if (data->gtRegNum != REG_WRITE_BARRIER) - { - inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet()); - } -#else - // At this point, we should not have any interference. - // That is, 'data' must not be in REG_ARG_0, - // as that is where 'addr' must go. - noway_assert(data->gtRegNum != REG_ARG_0); + // 'addr' goes into x14 (REG_WRITE_BARRIER_DST) + genCopyRegIfNeeded(addr, REG_WRITE_BARRIER_DST); - // addr goes in REG_ARG_0 - if (addr->gtRegNum != REG_ARG_0) - { - inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); - } - - // data goes in REG_ARG_1 - if (data->gtRegNum != REG_ARG_1) - { - inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); - } -#endif // NOGC_WRITE_BARRIERS + // 'data' goes into x15 (REG_WRITE_BARRIER_SRC) + genCopyRegIfNeeded(data, REG_WRITE_BARRIER_SRC); genGCWriteBarrier(tree, writeBarrierForm); } @@ -3116,6 +3091,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) // void CodeGen::genCodeForSwap(GenTreeOp* tree) { + assert(tree->OperIs(GT_SWAP)); + // Swap is only supported for lclVar operands that are enregistered // We do not consume or produce any registers. Both operands remain enregistered. // However, the gc-ness may change. diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index c1bcff0eeb47..f5dd308f851e 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -635,7 +635,7 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo // helper - The helper being inquired about // // Return Value: -// Mask of register kills -- registers whose value is no longer guaranteed to be the same. +// Mask of register kills -- registers whose values are no longer guaranteed to be the same. // regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) { @@ -645,7 +645,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) #if defined(_TARGET_AMD64_) return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH_NOGC; #elif defined(_TARGET_ARMARCH_) - return RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF | RBM_CALLEE_TRASH_NOGC; + return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; #elif defined(_TARGET_X86_) return RBM_ESI | RBM_EDI | RBM_ECX; #else @@ -653,6 +653,12 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_CALLEE_TRASH; #endif +#if defined(_TARGET_ARMARCH_) + case CORINFO_HELP_ASSIGN_REF: + case CORINFO_HELP_CHECKED_ASSIGN_REF: + return RBM_CALLEE_TRASH_WRITEBARRIER; +#endif + case CORINFO_HELP_PROF_FCN_ENTER: #ifdef RBM_PROFILER_ENTER_TRASH return RBM_PROFILER_ENTER_TRASH; @@ -742,15 +748,20 @@ regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper) return RBM_PROFILER_TAILCALL_TRASH; #endif // defined(_TARGET_XARCH_) +#if defined(_TARGET_X86_) case CORINFO_HELP_ASSIGN_BYREF: -#if defined(_TARGET_AMD64_) - return RBM_CALLEE_TRASH_NOGC; -#elif defined(_TARGET_X86_) // This helper only trashes ECX. return RBM_ECX; -#elif defined(_TARGET_ARMARCH_) - return RBM_CALLEE_TRASH_NOGC; -#endif // defined(_TARGET_AMD64_) +#endif // defined(_TARGET_X86_) + +#if defined(_TARGET_ARMARCH_) + case CORINFO_HELP_ASSIGN_BYREF: + return RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; + + case CORINFO_HELP_ASSIGN_REF: + case CORINFO_HELP_CHECKED_ASSIGN_REF: + return RBM_CALLEE_GCTRASH_WRITEBARRIER; +#endif default: return RBM_CALLEE_TRASH_NOGC; @@ -3969,8 +3980,8 @@ void CodeGen::genReportEH() // // Return Value: // true if an optimized write barrier helper should be used, false otherwise. -// Note: only x86 implements (register-specific source) optimized write -// barriers currently). +// Note: only x86 implements register-specific source optimized write +// barriers currently. // bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf) { @@ -3999,8 +4010,8 @@ bool CodeGenInterface::genUseOptimizedWriteBarriers(GCInfo::WriteBarrierForm wbf // // Return Value: // true if an optimized write barrier helper should be used, false otherwise. -// Note: only x86 implements (register-specific source) optimized write -// barriers currently). +// Note: only x86 implements register-specific source optimized write +// barriers currently. // bool CodeGenInterface::genUseOptimizedWriteBarriers(GenTree* tgt, GenTree* assignVal) { diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index 648a48162df4..a2aeb5e779f7 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -2102,7 +2102,7 @@ regMaskTP CodeGen::genMakeAddrArrElem(GenTree* arrElem, GenTree* tree, regMaskTP regMaskTP addrReg = RBM_NONE; regMaskTP regNeed = RBM_ALLINT; -#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS +#if !NOGC_WRITE_BARRIERS // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0 // since the arrObj participates in the lea/add instruction // that computes ARG_0 we should avoid putting it in ARG_1 @@ -3108,8 +3108,6 @@ regMaskTP CodeGen::WriteBarrier(GenTree* tgt, GenTree* assignVal, regMaskTP tgtA regMaskTP resultRegMask = RBM_NONE; -#if FEATURE_WRITE_BARRIER - regNumber reg = assignVal->gtRegNum; #if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS @@ -3340,13 +3338,6 @@ regMaskTP CodeGen::WriteBarrier(GenTree* tgt, GenTree* assignVal, regMaskTP tgtA return resultRegMask; } #endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS) - -#else // !FEATURE_WRITE_BARRIER - - NYI("FEATURE_WRITE_BARRIER unimplemented"); - return resultRegMask; - -#endif // !FEATURE_WRITE_BARRIER } #ifdef _TARGET_X86_ @@ -8877,8 +8868,7 @@ void CodeGen::genCodeForCopyObj(GenTree* tree, regMaskTP destReg) CorInfoGCType gcTypeNext = TYPE_GC_NONE; var_types type = TYP_I_IMPL; -#if FEATURE_WRITE_BARRIER - gcType = (CorInfoGCType)(*gcPtrs++); + gcType = (CorInfoGCType)(*gcPtrs++); if (blkSize > TARGET_POINTER_SIZE) gcTypeNext = (CorInfoGCType)(*gcPtrs); @@ -8893,9 +8883,6 @@ void CodeGen::genCodeForCopyObj(GenTree* tree, regMaskTP destReg) assert(regSrc == REG_ARG_1); assert(regTemp == REG_R2); } -#else - gcType = TYPE_GC_NONE; -#endif // FEATURE_WRITE_BARRIER blkSize -= TARGET_POINTER_SIZE; @@ -10427,14 +10414,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTree* tree, regMaskTP destReg, regMaskTP be } return; -#else // !_TARGET_XARCH_ - - case GT_LOCKADD: - case GT_XADD: - case GT_XCHG: - - NYI_ARM("LOCK instructions"); -#endif +#endif // _TARGET_XARCH_ case GT_ARR_LENGTH: { diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index 7a1d106f1700..32524ea5ed06 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -2335,15 +2335,12 @@ bool emitter::emitNoGChelper(unsigned IHX) #endif case CORINFO_HELP_ASSIGN_REF: - case CORINFO_HELP_CHECKED_ASSIGN_REF: + case CORINFO_HELP_ASSIGN_BYREF: case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR: - case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR: - case CORINFO_HELP_ASSIGN_BYREF: - case CORINFO_HELP_INIT_PINVOKE_FRAME: return true; diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index db6dbdd66aae..e5c98f946978 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -4467,12 +4467,8 @@ void emitter::emitIns_Call(EmitCallType callType, { assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd))); - // This call will preserve the liveness of most registers - // - // - On the ARM the NOGC helpers will preserve all registers, - // except for those listed in the RBM_CALLEE_TRASH_NOGC mask - - savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC; + // Get the set of registers that this call kills and remove it from the saved set. + savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd)); // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH if (isProfLeaveCB) diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index fe690d2abb80..dfca2cdd0069 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -7356,6 +7356,16 @@ void emitter::emitIns_Call(EmitCallType callType, { savedSet |= RBM_PROFILER_RET_SCRATCH; } + +#ifdef DEBUG + if (emitComp->verbose) + { + printf("NOGC Call: savedSet="); + printRegMaskInt(savedSet); + emitDispRegSet(savedSet); + printf("\n"); + } +#endif } else { diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp index a91bef26295b..6ed3f6d0dfaf 100644 --- a/src/jit/gcinfo.cpp +++ b/src/jit/gcinfo.cpp @@ -231,8 +231,6 @@ void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type) GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTree* tgt, GenTree* assignVal) { -#if FEATURE_WRITE_BARRIER - /* Are we storing a GC ptr? */ if (!varTypeIsGC(tgt->TypeGet())) @@ -292,7 +290,6 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTree* tgt, GenTree } assert(!"Missing case in gcIsWriteBarrierCandidate"); -#endif return WBF_NoBarrier; } diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 10ef3738f174..4614df0e3796 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -7456,7 +7456,7 @@ GenTree* Compiler::gtNewPutArgReg(var_types type, GenTree* arg, regNumber argReg // Returns the newly created BitCast node. // // Notes: -// The node is generated as GenTreeMultiRegOp on RyuJIT/armel, as GenTreeOp on all the other archs. +// The node is generated as GenTreeMultiRegOp on RyuJIT/arm, as GenTreeOp on all the other archs. // GenTree* Compiler::gtNewBitCastNode(var_types type, GenTree* arg) { diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index e87065310db7..182a0498203c 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -19221,9 +19221,9 @@ bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) bool Compiler::IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId) { - // Currently, if an math intrisic is not implemented by target-specific - // intructions, it will be implemented by a System.Math call. In the - // future, if we turn to implementing some of them with helper callers, + // Currently, if a math intrinsic is not implemented by target-specific + // instructions, it will be implemented by a System.Math call. In the + // future, if we turn to implementing some of them with helper calls, // this predicate needs to be revisited. return !IsTargetIntrinsic(intrinsicId); } diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 086be385666d..66a665b2b2ef 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -6377,14 +6377,14 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned m } // If the parent is a reload/copy node, then tree must be a multi-reg call node - // that has already had one of its registers spilled. This is Because multi-reg + // that has already had one of its registers spilled. This is because multi-reg // call node is the only node whose RefTypeDef positions get independently // spilled or reloaded. It is possible that one of its RefTypeDef position got // spilled and the next use of it requires it to be in a different register. // - // In this case set the ith position reg of reload/copy node to the reg allocated + // In this case set the i'th position reg of reload/copy node to the reg allocated // for copy/reload refPosition. Essentially a copy/reload node will have a reg - // for each multi-reg position of its child. If there is a valid reg in ith + // for each multi-reg position of its child. If there is a valid reg in i'th // position of GT_COPY or GT_RELOAD node then the corresponding result of its // child needs to be copied or reloaded to that reg. if (parent->IsCopyOrReload()) diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp index 5023b13069ad..62692d587a24 100644 --- a/src/jit/lsrabuild.cpp +++ b/src/jit/lsrabuild.cpp @@ -664,7 +664,8 @@ regMaskTP LinearScan::getKillSetForStoreInd(GenTreeStoreInd* tree) { // We can't determine the exact helper to be used at this point, because it depends on // the allocated register for the `data` operand. However, all the (x86) optimized - // helpers have the same kill set: EDX. + // helpers have the same kill set: EDX. And note that currently, only x86 can return + // `true` for genUseOptimizedWriteBarriers(). killMask = RBM_CALLEE_TRASH_NOGC; } else @@ -3141,22 +3142,18 @@ void LinearScan::BuildGCWriteBarrier(GenTree* tree) assert(info->dstCount == 0); bool customSourceRegs = false; -#if NOGC_WRITE_BARRIERS - #if defined(_TARGET_ARM64_) - // For the NOGC JIT Helper calls - // - // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF) - // the 'src' goes into x15 (REG_WRITE_BARRIER) + + // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST) + // the 'src' goes into x15 (REG_WRITE_BARRIER_SRC) // - addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST_BYREF); - srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER); + addrInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_DST); + srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); customSourceRegs = true; -#elif defined(_TARGET_X86_) +#elif defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS bool useOptimizedWriteBarrierHelper = compiler->codeGen->genUseOptimizedWriteBarriers(tree, src); - if (useOptimizedWriteBarrierHelper) { // Special write barrier: @@ -3166,11 +3163,8 @@ void LinearScan::BuildGCWriteBarrier(GenTree* tree) srcInfo->info.setSrcCandidates(this, RBM_WRITE_BARRIER_SRC); customSourceRegs = true; } -#else // !defined(_TARGET_X86_) && !defined(_TARGET_ARM64_) -#error "NOGC_WRITE_BARRIERS is not supported" -#endif // !defined(_TARGET_X86_) -#endif // NOGC_WRITE_BARRIERS +#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS if (!customSourceRegs) { diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index ea8f6a58d825..bd3d29650fea 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -5573,7 +5573,7 @@ void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight) return CALLINT_SCL_INDIRS; - case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32 + case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this case CORINFO_HELP_MEMSET: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_MEMCPY: // Not strictly needed as we don't make a GT_CALL with this case CORINFO_HELP_SETFIELDSTRUCT: diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp index 4b3134d3bd12..f37c422646c1 100644 --- a/src/jit/regset.cpp +++ b/src/jit/regset.cpp @@ -1902,11 +1902,13 @@ void RegSet::rsSpillRegs(regMaskTP regMask) * for internal tree temps to live in */ +#ifdef LEGACY_BACKEND extern const regNumber raRegTmpOrder[] = {REG_TMP_ORDER}; extern const regNumber rpRegTmpOrder[] = {REG_PREDICT_ORDER}; #if FEATURE_FP_REGALLOC extern const regNumber raRegFltTmpOrder[] = {REG_FLT_TMP_ORDER}; #endif +#endif // LEGACY_BACKEND /***************************************************************************** * diff --git a/src/jit/target.h b/src/jit/target.h index 5838cdda8872..45bc1013a639 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -366,7 +366,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define ALIGN_SIMD_TYPES 1 // whether SIMD type locals are to be aligned #endif // FEATURE_SIMD - #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 0 // X86 uses push instructions to pass args #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers @@ -510,11 +509,13 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_VAR_ORDER REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX #define MAX_VAR_ORDER_SIZE 6 + +#ifdef LEGACY_BACKEND #define REG_TMP_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI - #define RBM_TMP_ORDER RBM_EAX,RBM_EDX,RBM_ECX,RBM_EBX,RBM_ESI,RBM_EDI #define REG_TMP_ORDER_COUNT 6 #define REG_PREDICT_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI +#endif // LEGACY_BACKEND // The order here is fixed: it must agree with an order assumed in eetwain... #define REG_CALLEE_SAVED_ORDER REG_EDI,REG_ESI,REG_EBX,REG_EBP @@ -743,7 +744,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls #endif // !UNIX_AMD64_ABI #endif - #define FEATURE_WRITE_BARRIER 1 // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers) #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers @@ -770,11 +770,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define MAX_RET_REG_COUNT 1 // Maximum registers used to return a value. #endif // !UNIX_AMD64_ABI -#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers -#else - #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers). -#endif #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 #define TARGET_POINTER_SIZE 8 // equal to sizeof(void*) and the managed pointer size in bytes for this target @@ -882,17 +878,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_VAR_ORDER_FLT REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5,REG_XMM6,REG_XMM7,REG_XMM8,REG_XMM9,REG_XMM10,REG_XMM11,REG_XMM12,REG_XMM13,REG_XMM14,REG_XMM15 #ifdef UNIX_AMD64_ABI - #define REG_TMP_ORDER REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \ - REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13 -#else // !UNIX_AMD64_ABI - #define MAX_VAR_ORDER_SIZE (14 + REG_ETW_FRAMED_EBP_COUNT) - #define REG_TMP_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \ - REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13 -#endif // !UNIX_AMD64_ABI - -#ifdef UNIX_AMD64_ABI - #define REG_PREDICT_ORDER REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \ - REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13 #define CNT_CALLEE_SAVED (5 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_TRASH (9) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) @@ -903,9 +888,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_CALLEE_SAVED_ORDER REG_EBX,REG_ETW_FRAMED_EBP_LIST REG_R12,REG_R13,REG_R14,REG_R15 #define RBM_CALLEE_SAVED_ORDER RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15 #else // !UNIX_AMD64_ABI - #define REG_TMP_ORDER_COUNT (14 + REG_ETW_FRAMED_EBP_COUNT) - #define REG_PREDICT_ORDER REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \ - REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13 #define CNT_CALLEE_SAVED (7 + REG_ETW_FRAMED_EBP_COUNT) #define CNT_CALLEE_TRASH (7) #define CNT_CALLEE_ENREG (CNT_CALLEE_SAVED) @@ -969,11 +951,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_JUMP_THUNK_PARAM REG_EAX #define RBM_JUMP_THUNK_PARAM RBM_EAX -#if NOGC_WRITE_BARRIERS - #define REG_WRITE_BARRIER REG_EDX - #define RBM_WRITE_BARRIER RBM_EDX -#endif - // Register to be used for emitting helper calls whose call target is an indir of an // absolute memory address in case of Rel32 overflow i.e. a data address could not be // encoded as PC-relative 32-bit offset. @@ -991,12 +968,10 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R11 #define RBM_PINVOKE_COOKIE_PARAM RBM_R11 - #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R11 // GenericPInvokeCalliHelper unmanaged target Parameter #define REG_PINVOKE_TARGET_PARAM REG_R10 #define RBM_PINVOKE_TARGET_PARAM RBM_R10 - #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R10 // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R10 @@ -1188,7 +1163,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define CPBLK_UNROLL_LIMIT 32 // Upper bound to let the code generator to loop unroll CpBlk. #define INITBLK_UNROLL_LIMIT 32 // Upper bound to let the code generator to loop unroll InitBlk. - #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 0 // True when we want to promote fields of a multireg struct into registers @@ -1204,11 +1178,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. -#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 0 // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers -#else - #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers). -#endif #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really // need to track stack depth, but this is currently necessary to get GC information reported at call sites. @@ -1247,8 +1217,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12 #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_R12 - #define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR|RBM_DEFAULT_HELPER_CALL_TARGET) - #define REG_FASTTAILCALL_TARGET REG_R12 // Target register for fast tail call #define RBM_FASTTAILCALL_TARGET RBM_R12 @@ -1268,6 +1236,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits REG_F24, REG_F25, REG_F26, REG_F27, \ REG_F28, REG_F29, REG_F30, REG_F31, +#ifdef LEGACY_BACKEND #define MAX_VAR_ORDER_SIZE 32 #define REG_TMP_ORDER REG_R3,REG_R2,REG_R1,REG_R0, REG_R4,REG_R5,REG_R6,REG_R7,\ @@ -1287,6 +1256,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_PREDICT_ORDER REG_LR,REG_R12,REG_R3,REG_R2,REG_R1,REG_R0, \ REG_R7,REG_R6,REG_R5,REG_R4,REG_R8,REG_R9,REG_R10 +#endif // LEGACY_BACKEND #define RBM_LOW_REGS (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7) #define RBM_HIGH_REGS (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC) @@ -1371,27 +1341,60 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 -#if NOGC_WRITE_BARRIERS - #define REG_WRITE_BARRIER REG_R1 - #define RBM_WRITE_BARRIER RBM_R1 -#endif + // ARM write barrier ABI (see vm\arm\asmhelpers.asm, vm\arm\asmhelpers.S): + // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): + // On entry: + // r0: the destination address (LHS of the assignment) + // r1: the object reference (RHS of the assignment) + // On exit: + // r0: trashed + // r3: trashed + // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): + // On entry: + // r0: the destination address (object reference written here) + // r1: the source address (points to object reference to write) + // On exit: + // r0: incremented by 4 + // r1: incremented by 4 + // r2: trashed + // r3: trashed + + #define REG_WRITE_BARRIER_DST_BYREF REG_ARG_0 + #define RBM_WRITE_BARRIER_DST_BYREF RBM_ARG_0 - //In the ARM case, registers of write barrier use the normal argument registers. #define REG_WRITE_BARRIER_SRC_BYREF REG_ARG_1 #define RBM_WRITE_BARRIER_SRC_BYREF RBM_ARG_1 - #define REG_WRITE_BARRIER_DST_BYREF REG_ARG_0 - #define RBM_WRITE_BARRIER_DST_BYREF RBM_ARG_0 + #define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_R0|RBM_R3|RBM_LR|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_WRITEBARRIER + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that r0 and r1 are still valid byref pointers after this helper call, despite their value being changed. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R4 #define RBM_PINVOKE_COOKIE_PARAM RBM_R4 + +#ifdef LEGACY_BACKEND #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R4 +#endif // LEGACY_BACKEND // GenericPInvokeCalliHelper unmanaged target Parameter #define REG_PINVOKE_TARGET_PARAM REG_R12 #define RBM_PINVOKE_TARGET_PARAM RBM_R12 + +#ifdef LEGACY_BACKEND #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R12 +#endif // LEGACY_BACKEND // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R12 @@ -1546,7 +1549,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls #endif // FEATURE_SIMD - #define FEATURE_WRITE_BARRIER 1 // Generate the proper WriteBarrier calls for GC #define FEATURE_FIXED_OUT_ARGS 1 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers @@ -1562,11 +1564,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. -#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers -#else - #define NOGC_WRITE_BARRIERS 0 // Do not modify this -- modify the definition above. (If we're not using ASM barriers we definitely don't have NOGC barriers). -#endif #define USER_ARGS_COME_LAST 1 #define EMIT_TRACK_STACK_DEPTH 1 // This is something of a workaround. For both ARM and AMD64, the frame size is fixed, so we don't really // need to track stack depth, but this is currently necessary to get GC information reported at call sites. @@ -1608,9 +1606,6 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12 #define RBM_DEFAULT_HELPER_CALL_TARGET RBM_R12 - // REVIEW: why does arm64 RBM_CALLEE_TRASH_NOGC include IP1? The JIT_ByRefWriteBarrier only trashes r12 and r15. - #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET) - #define REG_FASTTAILCALL_TARGET REG_IP0 // Target register for fast tail call #define RBM_FASTTAILCALL_TARGET RBM_IP0 @@ -1679,26 +1674,65 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits #define REG_JUMP_THUNK_PARAM REG_R12 #define RBM_JUMP_THUNK_PARAM RBM_R12 -#if NOGC_WRITE_BARRIERS - #define REG_WRITE_BARRIER_SRC_BYREF REG_R13 - #define RBM_WRITE_BARRIER_SRC_BYREF RBM_R13 + // ARM64 write barrier ABI (see vm\arm64\asmhelpers.asm, vm\arm64\asmhelpers.S): + // CORINFO_HELP_ASSIGN_REF (JIT_WriteBarrier), CORINFO_HELP_CHECKED_ASSIGN_REF (JIT_CheckedWriteBarrier): + // On entry: + // x14: the destination address (LHS of the assignment) + // x15: the object reference (RHS of the assignment) + // On exit: + // x12: trashed + // x14: incremented by 8 + // x15: trashed + // x17: trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows) + // CORINFO_HELP_ASSIGN_BYREF (JIT_ByRefWriteBarrier): + // On entry: + // x13: the source address (points to object reference to write) + // x14: the destination address (object reference written here) + // On exit: + // x12: trashed + // x13: incremented by 8 + // x14: incremented by 8 + // x15: trashed + // x17: trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP (currently non-Windows) + // + // Note that while x17 (ip1) is currently only trashed under FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP, + // currently only set for non-Windows, it is expected to be set in the future for Windows, and for R2R. + // So simply always consider it trashed, to avoid later breaking changes. + + #define REG_WRITE_BARRIER_DST REG_R14 + #define RBM_WRITE_BARRIER_DST RBM_R14 + + #define REG_WRITE_BARRIER_SRC REG_R15 + #define RBM_WRITE_BARRIER_SRC RBM_R15 #define REG_WRITE_BARRIER_DST_BYREF REG_R14 #define RBM_WRITE_BARRIER_DST_BYREF RBM_R14 - #define REG_WRITE_BARRIER REG_R15 - #define RBM_WRITE_BARRIER RBM_R15 -#endif + #define REG_WRITE_BARRIER_SRC_BYREF REG_R13 + #define RBM_WRITE_BARRIER_SRC_BYREF RBM_R13 + + #define RBM_CALLEE_TRASH_NOGC (RBM_R12|RBM_R15|RBM_IP1|RBM_DEFAULT_HELPER_CALL_TARGET) + + // Registers killed by CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_TRASH_WRITEBARRIER (RBM_R14|RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_REF and CORINFO_HELP_CHECKED_ASSIGN_REF. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER RBM_CALLEE_TRASH_NOGC + + // Registers killed by CORINFO_HELP_ASSIGN_BYREF. + #define RBM_CALLEE_TRASH_WRITEBARRIER_BYREF (RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF | RBM_CALLEE_TRASH_NOGC) + + // Registers no longer containing GC pointers after CORINFO_HELP_ASSIGN_BYREF. + // Note that x13 and x14 are still valid byref pointers after this helper call, despite their value being changed. + #define RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF RBM_CALLEE_TRASH_NOGC // GenericPInvokeCalliHelper VASigCookie Parameter #define REG_PINVOKE_COOKIE_PARAM REG_R15 #define RBM_PINVOKE_COOKIE_PARAM RBM_R15 - #define PREDICT_REG_PINVOKE_COOKIE_PARAM PREDICT_REG_R15 // GenericPInvokeCalliHelper unmanaged target Parameter #define REG_PINVOKE_TARGET_PARAM REG_R14 #define RBM_PINVOKE_TARGET_PARAM RBM_R14 - #define PREDICT_REG_PINVOKE_TARGET_PARAM PREDICT_REG_R14 // IL stub's secret MethodDesc parameter (JitFlags::JIT_FLAG_PUBLISH_SECRET_PARAM) #define REG_SECRET_STUB_PARAM REG_R12 @@ -1919,6 +1953,7 @@ class Target }; static const enum ArgOrder g_tgtArgOrder; +#ifdef LEGACY_BACKEND #if NOGC_WRITE_BARRIERS static regMaskTP exclude_WriteBarrierReg(regMaskTP mask) { @@ -1929,6 +1964,7 @@ class Target return RBM_ALLINT & ~RBM_WRITE_BARRIER; } #endif // NOGC_WRITE_BARRIERS +#endif // LEGACY_BACKEND }; #if defined(DEBUG) || defined(LATE_DISASM) diff --git a/src/vm/arm/asmhelpers.asm b/src/vm/arm/asmhelpers.asm index b2430418d23b..60def084e748 100644 --- a/src/vm/arm/asmhelpers.asm +++ b/src/vm/arm/asmhelpers.asm @@ -1652,7 +1652,7 @@ DoWrite ; over the lifetime of the CLR. Specifically ARM has real problems reading the values of external globals (we ; need two memory indirections to do this) so we'd like to be able to directly set the current values of ; various GC globals (e.g. g_lowest_address and g_card_table) into the barrier code itself and then reset them -; every time they change (the GC already calls the VM to inform it of these changes). The handle this without +; every time they change (the GC already calls the VM to inform it of these changes). To handle this without ; creating too much fragility such as hardcoding instruction offsets in the VM update code, we wrap write ; barrier creation and GC globals access in a set of macros that create a table of descriptors describing each ; offset that must be patched. @@ -1916,10 +1916,11 @@ pShadow SETS "r7" ; is more important). ; ; Input: - ; $ptrReg : register containing the location to be updated - ; $valReg : register containing the value (an objref) to be written to the location above - ; $mp : boolean indicating whether the code will run on an MP system - ; $tmpReg : additional register that can be trashed (can alias $ptrReg or $valReg if needed) + ; $ptrReg : register containing the location to be updated + ; $valReg : register containing the value (an objref) to be written to the location above + ; $mp : boolean indicating whether the code will run on an MP system + ; $postGrow : boolean: {true} for post-grow version, {false} otherwise + ; $tmpReg : additional register that can be trashed (can alias $ptrReg or $valReg if needed) ; ; Output: ; $tmpReg : trashed (defaults to $ptrReg) @@ -2005,7 +2006,7 @@ tempReg SETS "$tmpReg" ; ; Finally define the write barrier functions themselves. Currently we don't provide variations that use ; different input registers. If the JIT wants this at a later stage in order to improve code quality it would -; be a relatively simply change to implement via an additional macro parameter to WRITE_BARRIER_ENTRY. +; be a relatively simple change to implement via an additional macro parameter to WRITE_BARRIER_ENTRY. ; ; The calling convention for the first batch of write barriers is: ;