From 3d1b3ad278cb7273ff64d0568465ea86add1506a Mon Sep 17 00:00:00 2001 From: Marius Pirvu Date: Thu, 4 Jan 2024 11:39:30 -0500 Subject: [PATCH] GRA changes to reduce JIT overhead at warm opt levels The commit introduces a new option -Xjit:graFreqThresholdAtWarm= that can be used to control the JIT overhead for compilations performed at warm optimization level or below. The default value of the threshold is 500. A larger value leads to lower JIT overhead but also to potentially lower quality of the generated code. For hot compilations or in situations where we think we can afford more JIT overhead, the threshold is automatically set to 0. Signed-off-by: Marius Pirvu --- compiler/control/OMROptions.cpp | 3 + compiler/control/OMROptions.hpp | 3 + .../optimizer/GlobalRegisterAllocator.cpp | 917 +++--------------- .../optimizer/GlobalRegisterAllocator.hpp | 7 +- compiler/optimizer/OMRRegisterCandidate.cpp | 55 +- 5 files changed, 151 insertions(+), 834 deletions(-) diff --git a/compiler/control/OMROptions.cpp b/compiler/control/OMROptions.cpp index afd726a9dac..55d52b82a10 100644 --- a/compiler/control/OMROptions.cpp +++ b/compiler/control/OMROptions.cpp @@ -829,6 +829,8 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"GCRresetCount=", "R\tthe value to which the counter is reset to after being tripped by guarded counting recompilations (positive value)", TR::Options::setCount, offsetof(OMR::Options,_GCRResetCount), 0, "F%d"}, {"generateCompleteInlineRanges", "O\tgenerate meta data ranges for each change in inliner depth", SET_OPTION_BIT(TR_GenerateCompleteInlineRanges), "F"}, + {"graFreqThresholdAtWarm=", "O\tgra threshold for block frequency for opt level less of equal to warm", + TR::Options::set32BitNumeric, offsetof(OMR::Options, _graFreqThresholdAtWarm), 500, "F%d"}, {"help", " \tdisplay this help information", TR::Options::helpOption, 0, 0, "F", NOT_IN_SUBSET}, {"help=", " {regex}\tdisplay help for options whose names match {regex}", TR::Options::helpOption, 1, 0, "F", NOT_IN_SUBSET}, {"highCodeCacheOccupancyBCount=", "R\tthe initial invocation count used during high code cache occupancy for methods with loops", @@ -2678,6 +2680,7 @@ OMR::Options::jitPreProcess() _alwaysWorthInliningThreshold = 15; _maxLimitedGRACandidates = TR_MAX_LIMITED_GRA_CANDIDATES; _maxLimitedGRARegs = TR_MAX_LIMITED_GRA_REGS; + _graFreqThresholdAtWarm = 500; _counterBucketGranularity = 2; _minCounterFidelity = INT_MIN; _lastIpaOptTransformationIndex = INT_MAX; diff --git a/compiler/control/OMROptions.hpp b/compiler/control/OMROptions.hpp index 06a5ca50987..530a55979f8 100644 --- a/compiler/control/OMROptions.hpp +++ b/compiler/control/OMROptions.hpp @@ -1468,6 +1468,7 @@ class OMR_EXTENSIBLE Options _insertGCRTrees = false; _maxLimitedGRACandidates = 0; _maxLimitedGRARegs = 0; + _graFreqThresholdAtWarm = 0; _enableGPU = 0; _isAOTCompile = false; _jProfilingMethodRecompThreshold = 0; @@ -1822,6 +1823,7 @@ class OMR_EXTENSIBLE Options int32_t getAlwaysWorthInliningThreshold() const { return _alwaysWorthInliningThreshold; } int32_t getMaxLimitedGRACandidates() { return _maxLimitedGRACandidates; } int32_t getMaxLimitedGRARegs() { return _maxLimitedGRARegs; } + int32_t getGRAFreqThresholdAtWarm() { return _graFreqThresholdAtWarm; } int32_t getNumLimitedGRARegsWithheld(); int32_t getProfilingCompNodecountThreshold() { return _profilingCompNodecountThreshold; } @@ -2481,6 +2483,7 @@ class OMR_EXTENSIBLE Options int32_t _maxLimitedGRACandidates; int32_t _maxLimitedGRARegs; + int32_t _graFreqThresholdAtWarm; int32_t _enableGPU; diff --git a/compiler/optimizer/GlobalRegisterAllocator.cpp b/compiler/optimizer/GlobalRegisterAllocator.cpp index 27a5d3ec64a..233a940d1db 100644 --- a/compiler/optimizer/GlobalRegisterAllocator.cpp +++ b/compiler/optimizer/GlobalRegisterAllocator.cpp @@ -79,7 +79,7 @@ #define GRA_COMPLEXITY_LIMIT 1000000000 -static bool isHot(TR::Compilation *comp) { return comp->getMethodHotness() >= hot; } +static bool isHot(TR::Compilation *comp) { return comp->getMethodHotness() >= hot || comp->getOption(TR_NotCompileTimeSensitive); } #define HAVE_DIFFERENT_MSB_TO_LSB_OFFSETS(r1,r2) \ ((((r1)->getHostByteOffset() + (r1)->getSize()) - ((r2)->getHostByteOffset() + (r2)->getSize())) != 0) @@ -310,7 +310,6 @@ TR_GlobalRegisterAllocator::perform() if (comp()->isGPUCompilation()) return 1; - walkTreesAndCollectSymbolDataTypes(); comp()->getOptimizer()->setResetExitsGRA(0); @@ -355,13 +354,9 @@ TR_GlobalRegisterAllocator::perform() for (a = locals.getFirst(); a != NULL; a = locals.getNext()) ++numLocals; - if (comp()->getOption(TR_EnableAggressiveLiveness)) - { - TR::ParameterSymbol *p; - ListIterator parms(&comp()->getMethodSymbol()->getParameterList()); - for (p = parms.getFirst(); p != NULL; p = parms.getNext()) - ++numLocals; - } + ListIterator parms(&comp()->getMethodSymbol()->getParameterList()); + for (TR::ParameterSymbol *p = parms.getFirst(); p != NULL; p = parms.getNext()) + ++numLocals; const uint64_t MAX_BITVECTOR_MEMORY_USAGE = 1000000000; uint64_t bitvectorMemoryUsage = numLocals * comp()->getFlowGraph()->getNextNodeNumber(); @@ -371,8 +366,7 @@ TR_GlobalRegisterAllocator::perform() { // Perform liveness analysis // - TR_Liveness liveLocals(comp(), optimizer(), comp()->getFlowGraph()->getStructure(), - false, NULL, false, comp()->getOption(TR_EnableAggressiveLiveness)); + TR_Liveness liveLocals(comp(), optimizer(), comp()->getFlowGraph()->getStructure(), false, NULL, false, true); liveLocals.perform(comp()->getFlowGraph()->getStructure()); @@ -422,10 +416,12 @@ TR_GlobalRegisterAllocator::perform() } candidates->getReferencedAutoSymRefs(comp()->trMemory()->currentStackRegion()); - if (!comp()->mayHaveLoops() || cg()->considerAllAutosAsTacticalGlobalRegisterCandidates()) + + static const char *skipit = feGetEnv("TR_SkipOfferAllGRA"); + if (NULL == skipit) + { offerAllAutosAndRegisterParmAsCandidates(cfgBlocks, numberOfBlocks); - else - offerAllFPAutosAndParmsAsCandidates(cfgBlocks, numberOfBlocks); + } _registerCandidates = new (trStackMemory()) SymRefCandidateMap((SymRefCandidateMapComparator()), SymRefCandidateMapAllocator(trMemory()->currentStackRegion())); @@ -436,10 +432,6 @@ TR_GlobalRegisterAllocator::perform() (*_registerCandidates)[rc->getSymbolReference()->getReferenceNumber()] = rc; } - findIfThenRegisterCandidates(); - - findLoopAutoRegisterCandidates(); - if (comp()->getOptions()->realTimeGC() && comp()->compilationShouldBeInterrupted(GRA_AFTER_FIND_LOOP_AUTO_CONTEXT)) { @@ -2650,280 +2642,6 @@ TR_GlobalRegisterAllocator::getGlobalRegister(TR::Symbol * symbol, TR_ArrayphaseTimer()); - - TR_ScratchList registerCandidates(trMemory()); - TR::CFG * cfg = comp()->getFlowGraph(); - - TR::ResolvedMethodSymbol *methodSymbol = comp()->getJittedMethodSymbol(); - ListIterator paramIterator(&(methodSymbol->getParameterList())); - ListIterator autoIterator(&(methodSymbol->getAutomaticList())); - TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); - TR::AutomaticSymbol *autoCursor = autoIterator.getFirst(); - - // This first part isn't really looking for If-Then-Else candidates. - // It is visiting all parameters and locals and ensuring that the candidate has a BlockInfo entry if it - // is live in any block. - if(!debug("oldIfThen")) - { - LexicalTimer t("newFindIfThen", comp()->phaseTimer()); - - // First create a bit vector of auto and parm symbols - // Guess at size of bit vector to use by getting first block's LiveLocals bitvector and check its size - TR_BitVector *guess = toBlock(cfg->getFirstNode())->getLiveLocals(); - int32_t guessSize = 1024; - if(guess && guess->numChunks()*BITS_IN_CHUNK > guessSize) - guessSize = guess->numChunks()*BITS_IN_CHUNK; - - TR_BitVector autoAndParmLiveLocalIndex(guessSize, trMemory(), stackAlloc, growable); - TR_Array registerCandidateByIndex(trMemory(), guessSize, false, stackAlloc); - autoAndParmLiveLocalIndex.empty(); - int32_t i; - while (paramCursor != NULL) - { - if (paramCursor->isReferencedParameter()) - { - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); - if (!rc) - { - paramCursor = paramIterator.getNext(); - continue; - } - i = paramCursor->getLiveLocalIndex(); - autoAndParmLiveLocalIndex.set(i); - registerCandidateByIndex[i] = rc; - } - paramCursor = paramIterator.getNext(); - } - while (autoCursor != NULL) - { - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(autoCursor); - if (!rc) - { - autoCursor = autoIterator.getNext(); - continue; - } - i = autoCursor->getLiveLocalIndex(); - autoAndParmLiveLocalIndex.set(i); - registerCandidateByIndex[i] = rc; - autoCursor = autoIterator.getNext(); - } - - // Now visit all blocks and intersect each blocks LiveLocals with autoAndParmLiveLocalIndex. - // For each intersected bit ensure BlockInfo exists for the candidate and initialize it - // to zero NumberOfLoadsAndStores if it does not exist - guessSize = autoAndParmLiveLocalIndex.numChunks()*BITS_IN_CHUNK; - TR_BitVector intersection(guessSize, trMemory(), stackAlloc, growable); - for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) - { - TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); - if (cg()->getLiveLocals() && liveLocals) - { - if (block != comp()->getStartBlock() && - block != cfg->getStart() && - block != cfg->getEnd()) - { - intersection = autoAndParmLiveLocalIndex; - intersection &= *liveLocals; - TR_BitVectorIterator bvi(intersection); - while (bvi.hasMoreElements()) - { - int32_t autoOrParm = bvi.getNextElement(); - TR::RegisterCandidate *rc=registerCandidateByIndex[autoOrParm]; - if(!rc->getBlocks().find(block->getNumber())) - rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); - } - } - } - } - } - else - { - LexicalTimer t("oldFindIfThen", comp()->phaseTimer()); - while (paramCursor != NULL) - { - if (paramCursor->isReferencedParameter()) - { - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); - if (!rc) - { - paramCursor = paramIterator.getNext(); - continue; - } - - for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) - { - TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); - if (cg()->getLiveLocals() && - liveLocals && !rc->getBlocks().find(block->getNumber())) - { - if (block != comp()->getStartBlock() && - block != cfg->getStart() && - block != cfg->getEnd() && - liveLocals->get(paramCursor->getLiveLocalIndex())) - { - rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); - } - } - } - } - - paramCursor = paramIterator.getNext(); - } - - while (autoCursor != NULL) - { - //if (paramCursor->isReferencedParameter()) - { - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(autoCursor); - if (!rc) - { - autoCursor = autoIterator.getNext(); - continue; - } - - for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) - { - TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); - if (cg()->getLiveLocals() && - liveLocals && !rc->getBlocks().find(block->getNumber())) - { - if (block != comp()->getStartBlock() && - block != cfg->getStart() && - block != cfg->getEnd() && - liveLocals->get(autoCursor->getLiveLocalIndex())) - { - rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); - } - } - } - } - - autoCursor = autoIterator.getNext(); - } - } - - for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) - { - TR::CFGEdgeList& edges = block->getSuccessors(); - TR::Block *currBlock = toBlock(block); - if ((edges.size() == 2) && currBlock->getExit()) - { - TR::Block * block1 = toBlock(edges.front()->getTo()); - TR::Block * block2 = toBlock((*(++edges.begin()))->getTo()); - - TR::Block *mergeBlock1 = NULL, *mergeBlock2 = NULL; - if (block1->getSuccessors().size() == 1) - mergeBlock1 = toBlock(block1->getSuccessors().front()->getTo()); - - if (mergeBlock1) - { - if (block2->hasSuccessor(mergeBlock1)) - mergeBlock2 = mergeBlock1; - } - else - { - if (block2->getSuccessors().size() == 1) - mergeBlock2 = toBlock(block2->getSuccessors().front()->getTo()); - - if (mergeBlock2) - { - if (block1->hasSuccessor(mergeBlock2)) - mergeBlock1 = mergeBlock2; - } - } - - - //if (block1->getSuccessors().size() == 1) && block2->getSuccessors().size() == 1)) - { - //TR::Block * mergeBlock1 = toBlock(block1->getSuccessors().front()->getTo()); - //TR::Block * mergeBlock2 = toBlock(block2->getSuccessors().front()->getTo()); - if (mergeBlock1 && - mergeBlock1 == mergeBlock2 && - mergeBlock1 != cfg->getEnd()) - { - for (TR::TreeTop * tt1 = block1->getEntry(); tt1 != block1->getExit(); tt1 = tt1->getNextTreeTop()) - { - TR::Node * storeNode1 = tt1->getNode()->getStoreNode(); - if (storeNode1 && storeNode1->getOpCode().isStoreDirect()) - { - TR::RegisterCandidate * rc = 0; - TR::SymbolReference * symRef = tt1->getNode()->getSymbolReference(); - if ((symRef->getSymbol()->isAutoOrParm() ) && - isSymRefAvailable(symRef)) - for (TR::TreeTop * tt2 = block2->getEntry(); tt2 != block2->getExit(); tt2 = tt2->getNextTreeTop()) - { - TR::Node * storeNode2 = tt2->getNode()->getStoreNode(); - if (storeNode2 && storeNode2->getOpCode().isStoreDirect() && storeNode2->getSymbolReference() == symRef) - { - rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); - break; - } - } - if (rc) - { - int32_t weight = 3; - if (mergeBlock1->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) - { - if (mergeBlock1->getStructureOf()) - optimizer()->getStaticFrequency(mergeBlock1, &weight); - - rc->addBlock(mergeBlock1, weight); - } - if (toBlock(block)->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) - { - weight = 1; - if (toBlock(block)->getStructureOf()) - optimizer()->getStaticFrequency(toBlock(block), &weight); - - rc->addBlock(block1, weight); - rc->addBlock(block2, weight); - } - } - } - } - } - } - - - TR::Node *virtualGuard = currBlock->getLastRealTreeTop()->getNode(); - if (virtualGuard->isTheVirtualGuardForAGuardedInlinedCall()) - { - TR::Block *branchBlock = virtualGuard->getBranchDestination()->getNode()->getBlock(); - TR::TreeTop *tt1 = currBlock->getEntry(); - for (;tt1 != currBlock->getExit(); tt1 = tt1->getNextTreeTop()) - { - TR::Node * storeNode1 = tt1->getNode()->getStoreNode(); - if (storeNode1 && storeNode1->getOpCode().isStoreDirect()) - { - TR::RegisterCandidate * rc = 0; - TR::SymbolReference * symRef = tt1->getNode()->getSymbolReference(); - if ((symRef->getSymbol()->isAutoOrParm() ) && - isSymRefAvailable(symRef)) - { - rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); - } - - if (rc) - { - int32_t weight = 1; - if (branchBlock->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) - { - if (branchBlock->getStructureOf()) - optimizer()->getStaticFrequency(branchBlock, &weight); - //printf("Adding symRef %d in block_%d\n", symRef->getReferenceNumber(), branchBlock->getNumber()); - rc->addBlock(branchBlock, weight); - } - } - } - } - } - } - } - } void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Block **cfgBlocks, int32_t numberOfNodes, bool onlySelectedCandidates) { @@ -2940,8 +2658,10 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl TR::Symbol *sym; TR::RegisterCandidates *candidates = comp()->getGlobalRegisterCandidates(); + int32_t freqThreshold = isHot(comp()) ? 0 : comp()->getOptions()->getGRAFreqThresholdAtWarm(); // Interested blocks consist of all blocks except for entry, exit and exception handlers + // TR_BitVector interestedBlocks(numberOfNodes, comp()->trMemory()->currentStackRegion()); TR_BitVector tmp(numberOfNodes, comp()->trMemory()->currentStackRegion()); for (node = cfg->getFirstNode(); node != NULL; node = node->getNext()) @@ -2952,42 +2672,73 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl interestedBlocks.set(block->getNumber()); } - // - // Offer parameters first - // - for (TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) - { - symRef = methodSymbol->getParmSymRef(paramCursor->getSlot()); + // First create a bit vector of auto and parm symbols + // Guess at size of bit vector to use by getting first block's LiveLocals bitvector and check its size + TR_BitVector *guess = toBlock(cfg->getFirstNode())->getLiveLocals(); + int32_t guessSize = 1024; + if (guess && guess->numChunks()*BITS_IN_CHUNK > guessSize) + guessSize = guess->numChunks()*BITS_IN_CHUNK; - if ((paramCursor->isReferencedParameter() && isTypeAvailable(symRef)) && - !onlySelectedCandidates) + TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); + TR_BitVector autoAndParmLiveLocalIndex(guessSize, trMemory(), stackAlloc, growable); + TR_Array registerCandidateByIndex(trMemory(), guessSize, false, stackAlloc); + autoAndParmLiveLocalIndex.empty(); + int32_t i; + while (paramCursor != NULL) + { + TR::SymbolReference *symRef = methodSymbol->getParmSymRef(paramCursor->getSlot()); + TR::RegisterCandidate *rc = NULL; + if (paramCursor->isReferencedParameter() && isTypeAvailable(symRef)) { - if (!isSymRefAvailable(symRef)) - continue; - - int32_t symRefNumber = symRef->getReferenceNumber(); - - // Check there is an interested block that references the symref - tmp.empty(); - tmp |= *candidates->getBlocksReferencingSymRef(symRefNumber); - tmp &= interestedBlocks; - if (tmp.isEmpty()) - continue; - - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); + if (!rc) + { + // Check there is an interested block that references the symref + tmp.empty(); + tmp |= *comp()->getGlobalRegisterCandidates()->getBlocksReferencingSymRef(symRef->getReferenceNumber()); + tmp &= interestedBlocks; + if (!tmp.isEmpty()) + { + rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + } + else + { + paramCursor = paramIterator.getNext(); + continue; + } + } - // All interested blocks will be candidates - rc->getBlocks().getCandidateBlocks() |= interestedBlocks; + // All live interested blocks will be candidates + rc->getBlocks().getCandidateBlocks() |= tmp; // Increment the number of loads and stores for all candidate blocks // that also reference the symref TR_BitVectorIterator bvi(tmp); while (bvi.hasMoreElements()) - rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); + { + int32_t nextBlockNum = bvi.getNextElement(); + TR::Block *nextBlock = cfgBlocks[nextBlockNum]; + if (isHot(comp()) || (nextBlock->getFrequency() > freqThreshold)) + { + int32_t executionFrequency = 1; + if (nextBlock->getStructureOf()) + optimizer()->getStaticFrequency(nextBlock, &executionFrequency); + rc->getBlocks().incNumberOfLoadsAndStores(nextBlockNum, executionFrequency); + } + } - if (paramCursor->getLinkageRegisterIndex() >= 0) - rc->addAllBlocks(); + static const char *doit = feGetEnv("TR_AddAllBlocksForLinkageRegs"); + if (doit != NULL) + { + if (paramCursor->getLinkageRegisterIndex() >= 0) + rc->addAllBlocks(); + } } + + i = paramCursor->getLiveLocalIndex(); + autoAndParmLiveLocalIndex.set(i); + registerCandidateByIndex[i] = rc; + paramCursor = paramIterator.getNext(); } // @@ -2999,8 +2750,9 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl if (symRef && isSymRefAvailable(symRef)) { sym = symRef->getSymbol(); - if (sym) + if (sym && sym->isAuto()) { + TR::AutomaticSymbol *autoCursor = sym->getAutoSymbol(); if (candidates->aliasesPreventAllocation(comp(),symRef)) { if (comp()->getOptions()->trace(OMR::tacticalGlobalRegisterAllocator)) @@ -3008,11 +2760,9 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl continue; } - if ((sym->isAuto() && - methodSymbol->getAutomaticList().find(sym->castToAutoSymbol()) && - !onlySelectedCandidates)) + if (methodSymbol->getAutomaticList().find(sym->castToAutoSymbol()) && + !onlySelectedCandidates) { - int32_t symRefNumber = symRef->getReferenceNumber(); // Check there is an interested block that references the symref @@ -3026,107 +2776,64 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl if (sym->isMethodMetaData() && rc && rc->initialBlocksWeightComputed()) continue; - // All interested blocks will be candidates - rc->getBlocks().getCandidateBlocks() |= interestedBlocks; + // All live interested blocks will be candidates + rc->getBlocks().getCandidateBlocks() |= tmp; // Increment the number of loads and stores for all candidate blocks // that also reference the symref TR_BitVectorIterator bvi(tmp); while (bvi.hasMoreElements()) - rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); + { + int32_t nextBlockNum = bvi.getNextElement(); + TR::Block *nextBlock = cfgBlocks[nextBlockNum]; + if (isHot(comp()) || (nextBlock->getFrequency() > freqThreshold)) + { + int32_t executionFrequency = 1; + if (nextBlock->getStructureOf()) + optimizer()->getStaticFrequency(nextBlock, &executionFrequency); + rc->getBlocks().incNumberOfLoadsAndStores(nextBlockNum, executionFrequency); + } + } rc->setInitialBlocksWeightComputed(true); + + i = autoCursor->getLiveLocalIndex(); + autoAndParmLiveLocalIndex.set(i); + registerCandidateByIndex[i] = rc; } } } } - } - - - -void TR_GlobalRegisterAllocator::offerAllFPAutosAndParmsAsCandidates(TR::Block * *cfgBlocks, int32_t numberOfNodes) - { - LexicalTimer t("TR_GlobalRegisterAllocator::offerAllFPAutosAndParmsAsCandidates", comp()->phaseTimer()); - - TR::CFG *cfg = comp()->getFlowGraph(); - TR::CFGNode *node; - TR::Block *block, *startBlock=toBlock(cfg->getStart()), *endBlock=toBlock(cfg->getEnd()); - int32_t symRefCount = comp()->getSymRefCount(); - TR::SymbolReferenceTable *symRefTab = comp()->getSymRefTab(); - TR::SymbolReference *symRef; - TR::Symbol *sym; - TR::ResolvedMethodSymbol *methodSymbol = comp()->getJittedMethodSymbol(); - TR::RegisterCandidates *candidates = comp()->getGlobalRegisterCandidates(); - - // Interested blocks consist of all blocks except for entry, exit and exception handlers - TR_BitVector interestedBlocks(numberOfNodes, comp()->trMemory()->currentStackRegion()); - TR_BitVector tmp(numberOfNodes, comp()->trMemory()->currentStackRegion()); - for (node = cfg->getFirstNode(); node != NULL; node = node->getNext()) - { - block = toBlock(node); - if (block == startBlock || block == endBlock || (!block->getExceptionPredecessors().empty()) || !cfgBlocks[block->getNumber()]) - continue; - interestedBlocks.set(block->getNumber()); - } - // - // Offer all FP autos now - // - for (int32_t symRefNumber = symRefTab->getIndexOfFirstSymRef(); symRefNumber < symRefCount; symRefNumber++) + // Now visit all blocks and intersect each blocks LiveLocals with autoAndParmLiveLocalIndex. + // For each intersected bit ensure BlockInfo exists for the candidate and initialize it + // to zero NumberOfLoadsAndStores if it does not exist + guessSize = autoAndParmLiveLocalIndex.numChunks()*BITS_IN_CHUNK; + TR_BitVector intersection(guessSize, trMemory(), stackAlloc, growable); + for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) { - symRef = symRefTab->getSymRef(symRefNumber); - if (symRef) + TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); + int32_t frequency = toBlock(block)->getFrequency(); + if ((isHot(comp()) || (frequency > freqThreshold)) && (cg()->getLiveLocals() && liveLocals)) { - sym = symRef->getSymbol(); - if (sym) + if (block != cfg->getStart() && block != cfg->getEnd()) { - - if ((sym->getDataType() == TR::Float - || sym->getDataType() == TR::Double - ) && - isTypeAvailable(symRef) && - ((sym->isAuto() && methodSymbol->getAutomaticList().find(sym->castToAutoSymbol())) || - (sym->isParm() && methodSymbol->getParameterList().find(sym->castToParmSymbol()) && sym->isReferencedParameter()))) + intersection = autoAndParmLiveLocalIndex; + intersection &= *liveLocals; + TR_BitVectorIterator bvi(intersection); + while (bvi.hasMoreElements()) { - - int32_t symRefNumber = symRef->getReferenceNumber(); - - // Check there is an interested block that references the symref - tmp.empty(); - tmp |= *candidates->getBlocksReferencingSymRef(symRefNumber); - tmp &= interestedBlocks; - if (tmp.isEmpty()) - continue; - - TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); - - // All interested blocks will be candidates - rc->getBlocks().getCandidateBlocks() |= interestedBlocks; - - // Increment the number of loads and stores for all candidate blocks - // that also reference the symref - TR_BitVectorIterator bvi(tmp); - while (bvi.hasMoreElements()) - rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); + int32_t autoOrParm = bvi.getNextElement(); + TR::RegisterCandidate *rc=registerCandidateByIndex[autoOrParm]; + if(!rc->getBlocks().find(block->getNumber())) + rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); } } } } } -void -TR_GlobalRegisterAllocator::findLoopAutoRegisterCandidates() - { - LexicalTimer t("TR_GlobalRegisterAllocator::findLoopAutoRegisterCandidates", comp()->phaseTimer()); - TR::StackMemoryRegion stackMemoryRegion(*trMemory()); - - TR::CFG * cfg = comp()->getFlowGraph(); - vcount_t visitCount = comp()->incVisitCount(); - TR_Structure *rootStructure = comp()->getFlowGraph()->getStructure(); - SymRefCandidateMap * registerCandidates = new (trStackMemory()) SymRefCandidateMap((SymRefCandidateMapComparator()), SymRefCandidateMapAllocator(trMemory()->currentStackRegion())); - findLoopsAndCorrespondingAutos(NULL, visitCount, *registerCandidates); - } TR_GlobalRegisterAllocator::BlockInfo & TR_GlobalRegisterAllocator::blockInfo(int32_t i) @@ -3136,189 +2843,6 @@ TR_GlobalRegisterAllocator::blockInfo(int32_t i) return _blockInfo[i]; } -void -TR_GlobalRegisterAllocator::findLoopsAndCorrespondingAutos(TR_StructureSubGraphNode *structureNode, vcount_t visitCount, SymRefCandidateMap ®isterCandidates) - { - TR_Structure *structure; - if (structureNode) - structure = structureNode->getStructure(); - else - structure = comp()->getFlowGraph()->getStructure(); - - if (structure->asRegion()) - { - TR_RegionStructure *regionStructure = structure->asRegion(); - TR_StructureSubGraphNode *subNode; - TR_Structure *subStruct = NULL; - TR_RegionStructure::Cursor si(*regionStructure); - for (subNode = si.getCurrent(); subNode != NULL; subNode = si.getNext()) - { - subStruct = subNode->getStructure(); - findLoopsAndCorrespondingAutos(subNode, visitCount, registerCandidates); - } - - if (!regionStructure->isAcyclic() && structureNode) - { - TR_ScratchList blocksInLoop(trMemory()); - regionStructure->getBlocks(&blocksInLoop); - - TR_BitVector assignedAutosInCurrentLoop(_origSymRefCount, trMemory(), stackAlloc); - - TR_BitVector *symsThatShouldNotBeAssignedInCurrentLoop = NULL; - TR_BitVector *symsThatShouldBeAssignedInCurrentLoop = NULL; - - bool excludeInvariantsEnabled = comp()->cg()->excludeInvariantsFromGRAEnabled() && - (!comp()->getOption(TR_DisableRXusage)); - - if (excludeInvariantsEnabled) - { - symsThatShouldNotBeAssignedInCurrentLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); - symsThatShouldNotBeAssignedInCurrentLoop->setAll(_origSymRefCount); - symsThatShouldBeAssignedInCurrentLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); - } - - ListIterator blocksIt(&blocksInLoop); - TR::Block *nextBlock; - bool hasCatchBlock = false; - for (nextBlock = blocksIt.getFirst(); nextBlock; nextBlock=blocksIt.getNext()) - { - if (nextBlock->isCatchBlock()) - hasCatchBlock = true; - } - - TR_BitVector *oldCandidatesSignExtendedInThisLoop = NULL; - if (_candidatesSignExtendedInThisLoop) - { - oldCandidatesSignExtendedInThisLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); - *oldCandidatesSignExtendedInThisLoop = *_candidatesSignExtendedInThisLoop; - _candidatesSignExtendedInThisLoop->empty(); - } - - for (nextBlock = blocksIt.getFirst(); nextBlock; nextBlock=blocksIt.getNext()) - { - if (nextBlock->getVisitCount() != visitCount) - { - static uint32_t numIter = 0; - if (((++numIter) & 0x3f)==0 && comp()->compilationShouldBeInterrupted(GRA_FIND_LOOPS_AND_CORRESPONDING_AUTOS_BLOCK_CONTEXT)) - { - comp()->failCompilation("interrupted in GRA-findLoopsAndCorrspondingAuto-block"); - } - nextBlock->setVisitCount(visitCount); - int32_t executionFrequency = 1; - if (nextBlock->getStructureOf()) - optimizer()->getStaticFrequency(nextBlock, &executionFrequency); - - TR::TreeTop *currentTree = nextBlock->getEntry(); - TR::TreeTop *exitTree = nextBlock->getExit(); - while (currentTree != exitTree) - { - TR::Node *currentNode = currentTree->getNode(); - TR::Node *arrayAccess = NULL; - markAutosUsedIn(currentNode, NULL, NULL, &arrayAccess, nextBlock, &blocksInLoop, visitCount, executionFrequency, registerCandidates, &assignedAutosInCurrentLoop, symsThatShouldNotBeAssignedInCurrentLoop, hasCatchBlock); - currentTree = currentTree->getNextRealTreeTop(); - } - } - } - - if (oldCandidatesSignExtendedInThisLoop) - *_candidatesSignExtendedInThisLoop = *oldCandidatesSignExtendedInThisLoop; - - for (auto succ = structureNode->getSuccessors().begin(); succ != structureNode->getSuccessors().end(); ++succ) - { - TR_Structure *exitStructure = (*succ)->getTo()->asStructureSubGraphNode()->getStructure(); - TR::Block *exitBlock = NULL; - if (exitStructure) - exitBlock = exitStructure->getEntryBlock(); - - // Turning off this code below. Reason : extending the live range till the - // exit blocks may lead to slowdowns if the loop (whose exit we are extending - // the live range to) was actually run for only a few iterations and - // the outer loop was in fact long running. Making the candidate live on entry - // to an exit out of the inner loop may cause it to be loaded up into the register - // on some hot path in the outer loop and this extra load could degrade performance. - // This scenario actually occurred in db shell sort, where the innermost loop is - // String compareTo code which does not run too long, but the outer loops - // are very hot. - // - if (0 && exitBlock && - (exitBlock != comp()->getFlowGraph()->getEnd())) - { - TR_BitVectorIterator bvi(assignedAutosInCurrentLoop); - while (bvi.hasMoreElements()) - { - int32_t nextCandidate = bvi.getNextElement(); - //dumpOptDetails(comp(), "For loop %d exit block_%d candidate %d\n", structureNode->getNumber(), exitBlock->getNumber(), nextCandidate); - TR::RegisterCandidate *rc = registerCandidates[nextCandidate]; - rc->addBlock(exitBlock, 0); - rc->addLoopExitBlock(exitBlock); - } - } - } - - if (symsThatShouldNotBeAssignedInCurrentLoop) - { - *symsThatShouldNotBeAssignedInCurrentLoop &= assignedAutosInCurrentLoop; - - TR_BitVectorIterator bvi(*symsThatShouldNotBeAssignedInCurrentLoop); - while (bvi.hasMoreElements()) - { - int32_t nextCandidate = bvi.getNextElement(); - if (!symsThatShouldBeAssignedInCurrentLoop->get(nextCandidate)) - { - *_temp2 = *symsThatShouldBeAssignedInCurrentLoop; - TR::RegisterCandidate *rc = registerCandidates[nextCandidate]; - TR::SymbolReference *symRef = rc->getSymbolReference(); - ListIterator pairs(&_pairedSymbols); - bool doNotRemoveThisCandidate = false; - TR_PairedSymbols *p; - for (p = pairs.getFirst(); p; p = pairs.getNext()) - { - TR::SymbolReference *otherSymRef = NULL; - if (p->_symRef1 == symRef) - otherSymRef = p->_symRef2; - else if (p->_symRef2 == symRef) - otherSymRef = p->_symRef1; - - if (otherSymRef && symsThatShouldNotBeAssignedInCurrentLoop->get(otherSymRef->getReferenceNumber())) - { - TR::RegisterCandidate *otherCandidate = registerCandidates[otherSymRef->getReferenceNumber()]; - if (otherCandidate->countNumberOfLoadsAndStoresInBlocks(&blocksInLoop) < rc->countNumberOfLoadsAndStoresInBlocks(&blocksInLoop)) - symsThatShouldBeAssignedInCurrentLoop->set(otherSymRef->getReferenceNumber()); - else - doNotRemoveThisCandidate = true; - } - } - - if (!doNotRemoveThisCandidate) - { - //printf("Removing candidate in %s\n", _compilation->getCurrentMethod()->signature()); - blocksIt.reset(); - for (nextBlock = blocksIt.getCurrent(); nextBlock; nextBlock=blocksIt.getNext()) - rc->removeBlock(nextBlock); - - for (auto succ = structureNode->getSuccessors().begin(); succ != structureNode->getSuccessors().end(); ++succ) - { - TR_Structure *exitStructure = (*succ)->getTo()->asStructureSubGraphNode()->getStructure(); - TR::Block *exitBlock = NULL; - if (exitStructure) - exitBlock = exitStructure->getEntryBlock(); - if (exitBlock && - (exitBlock != comp()->getFlowGraph()->getEnd())) - { - rc->removeBlock(exitBlock); - rc->removeLoopExitBlock(exitBlock); - } - } - } - else - *symsThatShouldBeAssignedInCurrentLoop = *_temp2; - } - } - } - } - } - } - bool TR_GlobalRegisterAllocator::isDependentStore(TR::Node *node, const TR_UseDefInfo::BitVector &defs, TR::SymbolReference *symRef, bool *seenLoad) { @@ -3353,231 +2877,6 @@ bool TR_GlobalRegisterAllocator::isDependentStore(TR::Node *node, const TR_UseDe } - -void -TR_GlobalRegisterAllocator::markAutosUsedIn( - TR::Node *node, - TR::Node *parent, - TR::Node *grandParent, - TR::Node **currentArrayAccess, - TR::Block *block, - List *blocksInLoop, - vcount_t visitCount, - int32_t executionFrequency, - SymRefCandidateMap ®isterCandidates, - TR_BitVector *assignedAutosInCurrentLoop, - TR_BitVector *symsThatShouldNotBeAssignedInCurrentLoop, - bool hasCatchBlock) - { - bool enableSignExtGRA = false; // enable for other platforms later - - static char *doit = feGetEnv("TR_SIGNEXTGRA"); - if (NULL != doit) - enableSignExtGRA = true; - - if (comp()->target().cpu.isZ()) - { - enableSignExtGRA = true; - static char *doit2 = feGetEnv("TR_NSIGNEXTGRA"); - if (NULL != doit2) - enableSignExtGRA = false; - } - - TR::Node *origNode = NULL; - TR::Node *prevArrayAccess = NULL; - - if (node->getVisitCount() == visitCount) - return; - - node->setVisitCount(visitCount); - - if (node->getOpCode().isLoadVarDirect() && node->getSymbolReference()->getSymbol()->isAuto()) - { - TR_UseDefInfo *info = optimizer()->getUseDefInfo(); - if (comp()->target().is64Bit() && info && - (parent->getOpCodeValue() == TR::i2l) && node->isNonNegative() && enableSignExtGRA) - { - node->setSkipSignExtension(true); - _candidatesNeedingSignExtension->set(node->getSymbolReference()->getReferenceNumber()); - - TR_BlockStructure *blockStructure = block->getStructureOf(); - //printf("Skip sign extension at node %p in %s\n", node, comp->getCurrentMethod()->signature()); - int32_t useIndex = node->getUseDefIndex(); - TR_UseDefInfo::BitVector defs(comp()->allocator()); - if (info->getUseDef(defs, useIndex)) - { - TR_UseDefInfo::BitVector::Cursor cursor(defs); - for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne()) - { - int32_t defIndex = info->getFirstDefIndex() + (int32_t) cursor; - if (defIndex < info->getFirstRealDefIndex()) // def is unseen--can't mark this node as skippable - { - node->setSkipSignExtension(false); - //printf("Parm def, so cannot skip sign extension at node %p in %s\n", - // node, comp->getCurrentMethod()->signature()); - break; - } - TR::Node *defNode = info->getNode(defIndex); - if (defNode->getOpCode().isStore()) - { - TR::Symbol *sym = defNode->getSymbolReference()->getSymbol(); - // TR_ASSERT((sym == node->getSymbolReference()->getSymbol()), "Symbols must match between a use and a def\n"); - // Comment out the above assume because: - // - The assume came from JAVA where there could not be aliasing with autos - // - If there is an alias causing a different sym then we will not have a candidate anyways so it does not matter - // - avoid extra computation if symbols do not match by adding extra condition below. - if (sym == node->getSymbolReference()->getSymbol() && - (sym->isAuto() )) - { - bool seenLoad = false; - bool dependentStore = isDependentStore(defNode, defs, node->getSymbolReference(), &seenLoad); - if (dependentStore && - defNode->getFirstChild()->isNonNegative() && - seenLoad) - { - //printf("Skip sign extension at def node %p in %s\n", defNode, comp->getCurrentMethod()->signature()); - TR::TreeTop *defTree = info->getTreeTop(defIndex); - TR::Block *defBlock = defTree->getEnclosingBlock(); - TR_BlockStructure *defBlockStructure = defBlock->getStructureOf(); - if (comp()->getFlowGraph()->getStructure() && - blockStructure && defBlockStructure && - (blockStructure->getContainingLoop() == defBlockStructure->getContainingLoop())) - continue; - } - - if (!defNode->getFirstChild()->isNonNegative()) - { - node->setSkipSignExtension(false); - break; - } - else - defNode->setNeedsSignExtension(true); - } - } - } - } - } - } - - if (node->getOpCode().isLoadVarDirect() || node->getOpCode().isStoreDirect()) - { - TR::CFG *cfg = comp()->getFlowGraph(); - - TR::SymbolReference *symRef = node->getSymbolReference(); - if (symRef->getSymbol()->isAutoOrParm() && isSymRefAvailable(symRef, blocksInLoop)) - { - TR::RegisterCandidate *rc = registerCandidates[symRef->getReferenceNumber()]; - if (!rc) - { - registerCandidates[symRef->getReferenceNumber()] = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); - rc = registerCandidates[symRef->getReferenceNumber()]; - } - - if (!assignedAutosInCurrentLoop->get(symRef->getReferenceNumber())) - { - assignedAutosInCurrentLoop->set(symRef->getReferenceNumber()); - ListIterator blocksIt(blocksInLoop); - TR::Block *nextBlock; - for (nextBlock = blocksIt.getCurrent(); nextBlock; nextBlock=blocksIt.getNext()) - { - if (!rc->hasBlock(nextBlock)) - { - if (nextBlock != cfg->getStart()) - rc->addBlock(nextBlock, 0); - } - } - } - - //dumpOptDetails(comp(), "Adding %d numLoadsStores to candidate %d in block_%d\n", executionFrequency, rc->getSymbolReference()->getReferenceNumber(), block->getNumber()); - - if (block != cfg->getStart()) - { - // We should not consider the cost of the copies for live-range splitting, - // because this cost is meaningful only when one of operand (source or destination) receives a register. - // Note: Although we need to re-calculate the cost when one of operand receives a register, - // we do not do so currently. - // - if ((node->getOpCode().isStoreDirect() && isSplittingCopy(node)) || - (node->getOpCode().isLoadVarDirect() && parent && parent->getOpCode().isStoreDirect() && isSplittingCopy(parent))) - { - rc->addBlock(block, 0); - } - else - { - if (comp()->target().cpu.isZ() && - rc->getSymbolReference()->getSymbol()->getDataType() == TR::Address && - parent && - (((parent->getOpCode().isStoreIndirect() || - parent->getOpCode().isLoadIndirect()) && - parent->getFirstChild() == node) || - (parent->getOpCode().isAdd() && - grandParent && - (grandParent->getOpCode().isStoreIndirect() || - grandParent->getOpCode().isLoadIndirect())))) - { - rc->addBlock(block, executionFrequency*10); - if (trace()) - dumpOptDetails(comp(), "Increased weight of candidate #%d in block_%d to reduce AGI\n", rc->getSymbolReference()->getReferenceNumber(), block->getNumber()); - } - else - { - rc->addBlock(block, executionFrequency); - } - } - } - - if (((node->getReferenceCount() > 1) || - node->getOpCode().isStoreDirect()) && - symsThatShouldNotBeAssignedInCurrentLoop) - symsThatShouldNotBeAssignedInCurrentLoop->reset(symRef->getReferenceNumber()); - - if (symsThatShouldNotBeAssignedInCurrentLoop && - node->getOpCode().isLoadVarDirect()) - { - if (parent->getNumChildren() == 2) - { - if (parent->getSecondChild()->getOpCode().isLoadConst()) - symsThatShouldNotBeAssignedInCurrentLoop->reset(symRef->getReferenceNumber()); - else - { - TR::Node *otherChild; - if (parent->getFirstChild() == node) - otherChild = parent->getSecondChild(); - else - otherChild = parent->getFirstChild(); - - if (otherChild->getOpCode().isLoadVarDirect() && - (otherChild->getSymbolReference()->getSymbol()->isAutoOrParm()) - ) - findOrCreatePairedSymbols(symRef, otherChild->getSymbolReference()); - } - } - } - } - } - - int32_t childNum; - for (childNum=0;childNumgetNumChildren();childNum++) - { - TR::Node *oldArrayAccess = *currentArrayAccess; - markAutosUsedIn(node->getChild(childNum), node, parent, currentArrayAccess, block, blocksInLoop, visitCount, executionFrequency, registerCandidates, assignedAutosInCurrentLoop, symsThatShouldNotBeAssignedInCurrentLoop, hasCatchBlock); - *currentArrayAccess = oldArrayAccess; - } - - TR::ILOpCode &opCode = node->getOpCode(); - TR::ILOpCodes opCodeValue = opCode.getOpCodeValue(); - - if (symsThatShouldNotBeAssignedInCurrentLoop && - assignedAutosInCurrentLoop && - ((opCode.hasSymbolReference() && - opCode.isIndirect()) || - (opCode.isArrayLength()) || - !(opCode.isAdd() || opCode.isSub() || opCode.isMul() || - opCode.isBooleanCompare() || opCode.isNullCheck() || opCode.isBndCheck()))) - findSymsUsedInIndirectAccesses(node, symsThatShouldNotBeAssignedInCurrentLoop, assignedAutosInCurrentLoop, true); - } - - void TR_GlobalRegisterAllocator::signExtendAllDefNodes(TR::Node *defNode, List *defNodes) { LexicalTimer t("TR_GlobalRegisterAllocator::signExtendAllDefNodes", comp()->phaseTimer()); diff --git a/compiler/optimizer/GlobalRegisterAllocator.hpp b/compiler/optimizer/GlobalRegisterAllocator.hpp index d0e98fb2ace..4c5c1cfaff7 100644 --- a/compiler/optimizer/GlobalRegisterAllocator.hpp +++ b/compiler/optimizer/GlobalRegisterAllocator.hpp @@ -169,16 +169,12 @@ class TR_GlobalRegisterAllocator : public TR::Optimization private: - void findIfThenRegisterCandidates(); - void findLoopsAndCorrespondingAutos(TR_StructureSubGraphNode *, vcount_t, SymRefCandidateMap &); void findLoopsAndAutosNoStructureInfo(vcount_t visitCount, TR::RegisterCandidate **registerCandidates); void initializeControlFlowInfo(); - virtual void markAutosUsedIn(TR::Node *, TR::Node *, TR::Node *, TR::Node **, TR::Block *, List *, vcount_t, int32_t, SymRefCandidateMap &, TR_BitVector *, TR_BitVector *, bool); void signExtendAllDefNodes(TR::Node *, List *); void findSymsUsedInIndirectAccesses(TR::Node *, TR_BitVector *, TR_BitVector *, bool); void offerAllAutosAndRegisterParmAsCandidates(TR::Block **, int32_t, bool onlySelectedCandidates = false); - void offerAllFPAutosAndParmsAsCandidates(TR::Block **, int32_t); bool allocateForSymRef(TR::SymbolReference *symRef); bool allocateForType(TR::DataType dt); @@ -223,8 +219,7 @@ class TR_GlobalRegisterAllocator : public TR::Optimization void appendStoreToBlock(TR::SymbolReference *storeSymRef, TR::SymbolReference *loadSymRef, TR::Block *block, TR::Node *node); */ protected: - void findLoopAutoRegisterCandidates(); - TR::Block * createNewSuccessorBlock(TR::Block *, TR::Block *, TR::TreeTop *, TR::Node *, TR::RegisterCandidate * rc); + TR::Block * createNewSuccessorBlock(TR::Block *, TR::Block *, TR::TreeTop *, TR::Node *, TR::RegisterCandidate * rc); void appendGotoBlock(TR::Block *gotoBlock, TR::Block *curBlock); void transformBlock(TR::TreeTop *); bool isTypeAvailable(TR::SymbolReference *symref); diff --git a/compiler/optimizer/OMRRegisterCandidate.cpp b/compiler/optimizer/OMRRegisterCandidate.cpp index 4507c92e68d..4743027fedc 100644 --- a/compiler/optimizer/OMRRegisterCandidate.cpp +++ b/compiler/optimizer/OMRRegisterCandidate.cpp @@ -106,32 +106,49 @@ void OMR::GlobalSet::collectBlocks() TR_BitVectorIterator bvi(references); TR::NodeChecklist visited(_comp); - for (TR::CFGNode *node = _comp->getFlowGraph()->getFirstNode(); node; node = node->getNext()) + TR::TreeTop *startTree = _comp->getStartTree(); + TR::TreeTop *endTree = NULL; + TR::TreeTop *treeTop; + TR::TreeTop *exitTreeTop = startTree->getExtendedBlockExitTreeTop(); + // Process each block in treetop order + // + for (treeTop = startTree; (treeTop != endTree); treeTop = exitTreeTop->getNextTreeTop()) { - TR::Block *block = toBlock(node); - if (!block) - continue; + TR::Block *block = NULL; + exitTreeTop = treeTop->getExtendedBlockExitTreeTop(); // Collect all autos/parms used in this block - references.empty(); visited.remove(visited); - for (TR::TreeTop * tt = block->getEntry(); tt && tt != block->getExit(); tt = tt->getNextTreeTop()) + for (TR::TreeTop * tt = treeTop; tt && tt != exitTreeTop; tt = tt->getNextTreeTop()) + { + if (tt->getNode()->getOpCodeValue() == TR::BBStart) + { + references.empty(); + block = tt->getNode()->getBlock(); + } + collectReferencedAutoSymRefs(tt->getNode(), references, visited); - // Set this block as referencing the collected autos/params - // Also set any blocks that extend this one - bvi.setBitVector(references); - while (bvi.hasMoreElements()) - { - uint32_t symRefNum = bvi.getNextElement(); - auto lookup = _blocksPerAuto.find(symRefNum); - if (lookup != _blocksPerAuto.end()) - lookup->second->set(block->getNumber()); - else + if (tt->getNextTreeTop()->getNode()->getOpCodeValue() == TR::BBEnd) { - TR_BitVector *blocks = new (_region) TR_BitVector(_region); - blocks->set(block->getNumber()); - _blocksPerAuto[symRefNum] = blocks; + // Set this block as referencing the collected autos/params + // + bvi.setBitVector(references); + while (bvi.hasMoreElements()) + { + uint32_t symRefNum = bvi.getNextElement(); + auto lookup = _blocksPerAuto.find(symRefNum); + if (lookup != _blocksPerAuto.end()) + { + lookup->second->set(block->getNumber()); + } + else + { + TR_BitVector *blocks = new (_region) TR_BitVector(_region); + blocks->set(block->getNumber()); + _blocksPerAuto[symRefNum] = blocks; + } + } } } }