From 01391ea98ee4a4d3e63f561a78b4e9f42a4981c7 Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Thu, 15 Jun 2023 11:28:06 -0400 Subject: [PATCH 1/3] Disable bu2i with i2b child removal for signed arraycmp When the ArrayCmpSign flag is set on an arraycmp node it can return a int -1, which will not fit in an unsigned byte value. Signed-off-by: Spencer Comin --- compiler/optimizer/OMRSimplifierHandlers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/optimizer/OMRSimplifierHandlers.cpp b/compiler/optimizer/OMRSimplifierHandlers.cpp index 711e17aa6e2..8d14d0ee8bc 100644 --- a/compiler/optimizer/OMRSimplifierHandlers.cpp +++ b/compiler/optimizer/OMRSimplifierHandlers.cpp @@ -12464,7 +12464,7 @@ TR::Node *bu2iSimplifier(TR::Node * node, TR::Block * block, TR::Simplifier * s) } else if (firstChild->getOpCodeValue() == TR::i2b && (firstChild->getFirstChild()->getOpCodeValue() == TR::butest || - firstChild->getFirstChild()->getOpCodeValue() == TR::arraycmp || + (firstChild->getFirstChild()->getOpCodeValue() == TR::arraycmp && !firstChild->getFirstChild()->isArrayCmpSign()) || firstChild->getFirstChild()->getOpCodeValue() == TR::icmpeq || firstChild->getFirstChild()->getOpCodeValue() == TR::lcmpeq || firstChild->getFirstChild()->getOpCodeValue() == TR::icmpne || From d2050b18e3fd68bbf45e891dccc34162b7b897fe Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Thu, 15 Jun 2023 11:26:39 -0400 Subject: [PATCH 2/3] Create arraycmplen opcode The arraycmplen opcode takes on the functionality of the arraycmp opcode with the arrayCmpLen. Instead of creating an arraycmp node and setting the arrayCmpLen flag, we can simply create an arraycmplen node. This also makes the arrayCmpLen flag unnecessary, so it is removed. Signed-off-by: Spencer Comin --- compiler/aarch64/codegen/OMRCodeGenerator.cpp | 5 + compiler/aarch64/codegen/OMRTreeEvaluator.cpp | 17 +- compiler/aarch64/codegen/OMRTreeEvaluator.hpp | 1 + compiler/arm/codegen/OMRTreeEvaluator.cpp | 6 + compiler/arm/codegen/OMRTreeEvaluator.hpp | 1 + compiler/codegen/OMRCodeGenerator.hpp | 5 +- compiler/compile/OMRNonHelperSymbols.enum | 1 + compiler/compile/OMRSymbolReferenceTable.cpp | 13 + compiler/compile/OMRSymbolReferenceTable.hpp | 2 + compiler/il/Aliases.cpp | 3 +- compiler/il/OMRILOps.hpp | 20 +- compiler/il/OMRNode.cpp | 23 +- compiler/il/OMRNode.hpp | 5 - compiler/il/OMROpcodes.enum | 20 +- compiler/optimizer/DeadTreesElimination.cpp | 1 - .../optimizer/IsolatedStoreElimination.cpp | 1 + compiler/optimizer/LoopReducer.cpp | 1 + compiler/optimizer/OMRSimplifierTable.enum | 1 + compiler/optimizer/SinkStores.cpp | 2 + compiler/optimizer/ValuePropagationTable.hpp | 1 + compiler/p/codegen/OMRCodeGenerator.cpp | 1 + compiler/p/codegen/OMRTreeEvaluator.cpp | 25 +- compiler/p/codegen/OMRTreeEvaluator.hpp | 1 + compiler/ras/Debug.cpp | 3 +- compiler/riscv/codegen/OMRTreeEvaluator.cpp | 7 + compiler/riscv/codegen/OMRTreeEvaluator.hpp | 1 + compiler/x/codegen/OMRCodeGenerator.cpp | 1 + compiler/x/codegen/OMRTreeEvaluator.cpp | 11 +- compiler/x/codegen/OMRTreeEvaluator.hpp | 1 + compiler/x/i386/codegen/OMRCodeGenerator.cpp | 2 + compiler/z/codegen/OMRCodeGenerator.cpp | 1 + compiler/z/codegen/OMRTreeEvaluator.cpp | 224 +++++++++--------- compiler/z/codegen/OMRTreeEvaluator.hpp | 4 +- fvtest/compilertriltest/ArrayTest.cpp | 212 +++++++++-------- 34 files changed, 366 insertions(+), 257 deletions(-) diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index cbd8098aa07..d339c8e5b94 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -181,6 +181,11 @@ OMR::ARM64::CodeGenerator::initialize() { cg->setSupportsArrayCmp(); } + static const bool disableArrayCmpLen = feGetEnv("TR_aarch64DisableArrayCmpLen") != NULL; + if (!disableArrayCmpLen) + { + cg->setSupportsArrayCmpLen(); + } } if (!comp->getOption(TR_DisableArraySetOpts)) { diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index 35540da350a..0e8b3aa97b8 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6758,8 +6758,8 @@ OMR::ARM64::TreeEvaluator::arraysetEvaluator(TR::Node *node, TR::CodeGenerator * return NULL; } -TR::Register * -OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register * +arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) { /* * Generating following instruction sequence @@ -6865,7 +6865,6 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * TR::Node *src2Node = node->getSecondChild(); TR::Node *lengthNode = node->getThirdChild(); bool isLengthGreaterThan15 = lengthNode->getOpCode().isLoadConst() && lengthNode->getConstValue() > 15; - const bool isArrayCmpLen = node->isArrayCmpLen(); TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager(12); TR::Register *savedSrc1Reg = cg->evaluate(src1Node); @@ -7113,6 +7112,18 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * return resultReg; } +TR::Register * +OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return arraycmpEvaluatorHelper(node, cg, false); + } + +TR::Register * +OMR::ARM64::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return arraycmpEvaluatorHelper(node, cg, true); + } + static void inlineConstantLengthForwardArrayCopy(TR::Node *node, int64_t byteLen, TR::Register *srcReg, TR::Register *dstReg, TR::CodeGenerator *cg) { diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp index a46c886faa8..58056973574 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp @@ -833,6 +833,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *computeCCEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *butestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *sutestEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/arm/codegen/OMRTreeEvaluator.cpp b/compiler/arm/codegen/OMRTreeEvaluator.cpp index 726c1fe2110..fd82a73966e 100644 --- a/compiler/arm/codegen/OMRTreeEvaluator.cpp +++ b/compiler/arm/codegen/OMRTreeEvaluator.cpp @@ -3612,6 +3612,12 @@ TR::Register *OMR::ARM::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::Cod return NULL; } +TR::Register *OMR::ARM::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + TR_UNIMPLEMENTED(); + return NULL; + } + bool OMR::ARM::TreeEvaluator::stopUsingCopyReg( TR::Node* node, TR::Register*& reg, diff --git a/compiler/arm/codegen/OMRTreeEvaluator.hpp b/compiler/arm/codegen/OMRTreeEvaluator.hpp index 49cb9391183..25c59cce12b 100644 --- a/compiler/arm/codegen/OMRTreeEvaluator.hpp +++ b/compiler/arm/codegen/OMRTreeEvaluator.hpp @@ -762,6 +762,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBStartEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBEndEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *commonLoadEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic memoryToRegisterOp, int32_t memSize, TR::CodeGenerator *cg); diff --git a/compiler/codegen/OMRCodeGenerator.hpp b/compiler/codegen/OMRCodeGenerator.hpp index a32aea0c8d5..ab1bce839d2 100644 --- a/compiler/codegen/OMRCodeGenerator.hpp +++ b/compiler/codegen/OMRCodeGenerator.hpp @@ -1631,6 +1631,9 @@ class OMR_EXTENSIBLE CodeGenerator bool getSupportsArrayCmp() {return _flags1.testAny(SupportsArrayCmp);} void setSupportsArrayCmp() {_flags1.set(SupportsArrayCmp);} + bool getSupportsArrayCmpLen() {return _flags1.testAny(SupportsArrayCmpLen);} + void setSupportsArrayCmpLen() {_flags1.set(SupportsArrayCmpLen);} + bool getSupportsArrayCmpSign() {return _flags3.testAny(SupportsArrayCmpSign);} void setSupportsArrayCmpSign() {_flags3.set(SupportsArrayCmpSign);} @@ -1828,7 +1831,7 @@ class OMR_EXTENSIBLE CodeGenerator // AVAILABLE = 0x02000000, UsesRegisterPairsForLongs = 0x04000000, SupportsArraySet = 0x08000000, - // AVAILABLE = 0x10000000, + SupportsArrayCmpLen = 0x10000000, SupportsArrayCmp = 0x20000000, DisableLongGRA = 0x40000000, DummyLastEnum1 diff --git a/compiler/compile/OMRNonHelperSymbols.enum b/compiler/compile/OMRNonHelperSymbols.enum index efacf119412..e71321980d7 100644 --- a/compiler/compile/OMRNonHelperSymbols.enum +++ b/compiler/compile/OMRNonHelperSymbols.enum @@ -27,6 +27,7 @@ arraySetSymbol, arrayCopySymbol, arrayCmpSymbol, + arrayCmpLenSymbol, prefetchSymbol, killsAllMethodSymbol, // A dummy method whose alias set includes all diff --git a/compiler/compile/OMRSymbolReferenceTable.cpp b/compiler/compile/OMRSymbolReferenceTable.cpp index 7f302041fbe..2d46ecda690 100644 --- a/compiler/compile/OMRSymbolReferenceTable.cpp +++ b/compiler/compile/OMRSymbolReferenceTable.cpp @@ -663,6 +663,19 @@ OMR::SymbolReferenceTable::findOrCreateArrayCmpSymbol() return element(arrayCmpSymbol); } +TR::SymbolReference * +OMR::SymbolReferenceTable::findOrCreateArrayCmpLenSymbol() + { + if (!element(arrayCmpLenSymbol)) + { + TR::MethodSymbol * sym = TR::MethodSymbol::create(trHeapMemory(),TR_Helper); + sym->setHelper(); + + element(arrayCmpLenSymbol) = new (trHeapMemory()) TR::SymbolReference(self(), arrayCmpLenSymbol, sym); + } + return element(arrayCmpLenSymbol); + } + TR::SymbolReference * OMR::SymbolReferenceTable::findOrCreateCurrentTimeMaxPrecisionSymbol() { diff --git a/compiler/compile/OMRSymbolReferenceTable.hpp b/compiler/compile/OMRSymbolReferenceTable.hpp index f60641f29c7..46e62ce8d72 100644 --- a/compiler/compile/OMRSymbolReferenceTable.hpp +++ b/compiler/compile/OMRSymbolReferenceTable.hpp @@ -88,6 +88,7 @@ class SymbolReferenceTable arraySetSymbol, arrayCopySymbol, arrayCmpSymbol, + arrayCmpLenSymbol, prefetchSymbol, killsAllMethodSymbol, // A dummy method whose alias set includes all @@ -844,6 +845,7 @@ class SymbolReferenceTable TR::SymbolReference * findOrCreateArrayCopySymbol(); TR::SymbolReference * findOrCreateArraySetSymbol(); TR::SymbolReference * findOrCreateArrayCmpSymbol(); + TR::SymbolReference * findOrCreateArrayCmpLenSymbol(); TR::SymbolReference * findOrCreateClassSymbol(TR::ResolvedMethodSymbol * owningMethodSymbol, int32_t cpIndex, void * classObject, bool cpIndexOfStatic = false); TR::SymbolReference * findOrCreateArrayShadowSymbolRef(TR::DataType type, TR::Node * baseArrayAddress, int32_t size, TR_FrontEnd * fe); diff --git a/compiler/il/Aliases.cpp b/compiler/il/Aliases.cpp index 61ccd646994..d9add540866 100644 --- a/compiler/il/Aliases.cpp +++ b/compiler/il/Aliases.cpp @@ -351,7 +351,8 @@ OMR::SymbolReference::getUseDefAliasesBV(bool isDirectCall, bool includeGCSafePo return &symRefTab->aliasBuilder.defaultMethodDefAliases(); } - if (symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpSymbol)) + if (symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpSymbol) || + symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpLenSymbol)) return 0; switch (self()->getReferenceNumber()) diff --git a/compiler/il/OMRILOps.hpp b/compiler/il/OMRILOps.hpp index e67ebe9a7fa..37fa12173cb 100644 --- a/compiler/il/OMRILOps.hpp +++ b/compiler/il/OMRILOps.hpp @@ -558,11 +558,12 @@ class ILOpCode bool isFunctionCall() const { - return isCall() && - getOpCodeValue() != TR::arraycopy && - getOpCodeValue() != TR::arrayset && - getOpCodeValue() != TR::bitOpMem && - getOpCodeValue() != TR::arraycmp; + return isCall() && + getOpCodeValue() != TR::arraycopy && + getOpCodeValue() != TR::arrayset && + getOpCodeValue() != TR::bitOpMem && + getOpCodeValue() != TR::arraycmp && + getOpCodeValue() != TR::arraycmplen; } bool isCompareDouble() @@ -637,10 +638,11 @@ class ILOpCode bool isMemToMemOp() { - return getOpCodeValue() == TR::bitOpMem || - getOpCodeValue() == TR::arrayset || - getOpCodeValue() == TR::arraycmp || - getOpCodeValue() == TR::arraycopy; + return getOpCodeValue() == TR::bitOpMem || + getOpCodeValue() == TR::arrayset || + getOpCodeValue() == TR::arraycmp || + getOpCodeValue() == TR::arraycopy || + getOpCodeValue() == TR::arraycmplen; } static TR::ILOpCodes getDataTypeConversion(TR::DataType t1, TR::DataType t2); diff --git a/compiler/il/OMRNode.cpp b/compiler/il/OMRNode.cpp index 370393cb01d..87d13fa7dc7 100644 --- a/compiler/il/OMRNode.cpp +++ b/compiler/il/OMRNode.cpp @@ -3649,6 +3649,7 @@ OMR::Node::exceptionsRaised() possibleExceptions |= TR::Block:: CanCatchBoundCheck; break; case TR::arraycmp: // does not throw any exceptions + case TR::arraycmplen: break; case TR::checkcast: possibleExceptions |= TR::Block:: CanCatchCheckCast; @@ -6111,28 +6112,6 @@ OMR::Node::chkTableBackedByRawStorage() return self()->getOpCodeValue() == TR::arraytranslate && _flags.testAny(tableBackedByRawStorage); } -bool -OMR::Node::isArrayCmpLen() - { - TR_ASSERT(self()->getOpCodeValue() == TR::arraycmp, "Opcode must be arraycmp"); - return _flags.testAny(arrayCmpLen); - } - -void -OMR::Node::setArrayCmpLen(bool v) - { - TR::Compilation *c = TR::comp(); - TR_ASSERT(self()->getOpCodeValue() == TR::arraycmp, "Opcode must be arraycmp"); - if (performNodeTransformation2(c, "O^O NODE FLAGS: Setting arrayCmpLen flag on node %p to %d\n", self(), v)) - _flags.set(arrayCmpLen, v); - } - -bool -OMR::Node::chkArrayCmpLen() - { - return self()->getOpCodeValue() == TR::arraycmp && _flags.testAny(arrayCmpLen); - } - bool OMR::Node::isArrayCmpSign() { diff --git a/compiler/il/OMRNode.hpp b/compiler/il/OMRNode.hpp index 5da70fbf591..bdc421b74cf 100644 --- a/compiler/il/OMRNode.hpp +++ b/compiler/il/OMRNode.hpp @@ -1191,10 +1191,6 @@ class OMR_EXTENSIBLE Node bool chkTableBackedByRawStorage(); // Flags used by TR::arraycmp - bool isArrayCmpLen(); - void setArrayCmpLen(bool v); - bool chkArrayCmpLen(); - bool isArrayCmpSign(); void setArrayCmpSign(bool v); bool chkArrayCmpSign(); @@ -1851,7 +1847,6 @@ class OMR_EXTENSIBLE Node tableBackedByRawStorage = 0x00008000, // Flags used by TR::arraycmp - arrayCmpLen = 0x00008000, arrayCmpSign = 0x00004000, // Flags used by TR::arraycopy diff --git a/compiler/il/OMROpcodes.enum b/compiler/il/OMROpcodes.enum index dd714fb9a4a..2dba4d20fa3 100644 --- a/compiler/il/OMROpcodes.enum +++ b/compiler/il/OMROpcodes.enum @@ -7201,7 +7201,25 @@ OPCODE_MACRO(\ /* .reverseBranchOpCode = */ TR::BadILOp, \ /* .booleanCompareOpCode = */ TR::BadILOp, \ /* .ifCompareOpCode = */ TR::BadILOp, \ - /* .description = Inline code for memory comparison of part of an array */ \ + /* .description = Compare two blocks of memory and returning a lexical ordering constant. */ \ + /* The constant indicates whether the first child is lesser, equal, or greater than the second child. */ \ + /* When the arrayCmpSign flag is set, the lesser/equal/greater constants are -1/0/1, otherwise the constants are 1/0/2 */ \ +) +OPCODE_MACRO(\ + /* .opcode = */ arraycmplen, \ + /* .name = */ "arraycmplen", \ + /* .properties1 = */ ILProp1::Call | ILProp1::HasSymbolRef, \ + /* .properties2 = */ 0, \ + /* .properties3 = */ ILProp3::LikeUse | ILProp3::LikeDef | ILProp3::SkipDynamicLitPoolOnInts, \ + /* .properties4 = */ 0, \ + /* .dataType = */ TR::Int32, \ + /* .typeProperties = */ ILTypeProp::Size_4 | ILTypeProp::Integer, \ + /* .childProperties = */ THREE_CHILD(TR::Address, TR::Address, TR::Int32), \ + /* .swapChildrenOpCode = */ TR::BadILOp, \ + /* .reverseBranchOpCode = */ TR::BadILOp, \ + /* .booleanCompareOpCode = */ TR::BadILOp, \ + /* .ifCompareOpCode = */ TR::BadILOp, \ + /* .description = Compare two blocks of memory and returning the index of the first mismatched byte */ \ ) OPCODE_MACRO(\ /* .opcode = */ allocationFence, \ diff --git a/compiler/optimizer/DeadTreesElimination.cpp b/compiler/optimizer/DeadTreesElimination.cpp index 125a1710b97..c6c371cd229 100644 --- a/compiler/optimizer/DeadTreesElimination.cpp +++ b/compiler/optimizer/DeadTreesElimination.cpp @@ -672,7 +672,6 @@ bool TR::DeadTreesElimination::fixUpTree(TR::Node *node, TR::TreeTop *treeTop, T // for arraycmp node, don't create its tree top anchor // fold it into if statment and save jump instruction if (node->getOpCodeValue() == TR::arraycmp && - !node->isArrayCmpLen() && comp()->target().cpu.isX86()) { anchorArrayCmp = false; diff --git a/compiler/optimizer/IsolatedStoreElimination.cpp b/compiler/optimizer/IsolatedStoreElimination.cpp index 2fd4735bba1..3b55dd8aad7 100644 --- a/compiler/optimizer/IsolatedStoreElimination.cpp +++ b/compiler/optimizer/IsolatedStoreElimination.cpp @@ -1628,6 +1628,7 @@ nodeHasSideEffect(TR::Node *node) { case TR::arrayset: case TR::arraycmp: + case TR::arraycmplen: case TR::arraytranslate: case TR::arraytranslateAndTest: case TR::long2String: diff --git a/compiler/optimizer/LoopReducer.cpp b/compiler/optimizer/LoopReducer.cpp index 17791357cbf..aca1f3b1fc7 100644 --- a/compiler/optimizer/LoopReducer.cpp +++ b/compiler/optimizer/LoopReducer.cpp @@ -4429,6 +4429,7 @@ TR_LoopReducer::perform() !comp()->cg()->getSupportsReferenceArrayCopy() && !comp()->cg()->getSupportsPrimitiveArrayCopy() && !comp()->cg()->getSupportsArrayCmp() && + !comp()->cg()->getSupportsArrayCmpLen() && !comp()->cg()->getSupportsArrayTranslateTRxx() && !comp()->cg()->getSupportsArrayTranslateAndTest()) { diff --git a/compiler/optimizer/OMRSimplifierTable.enum b/compiler/optimizer/OMRSimplifierTable.enum index 94751d7c043..9cce407c88c 100644 --- a/compiler/optimizer/OMRSimplifierTable.enum +++ b/compiler/optimizer/OMRSimplifierTable.enum @@ -611,6 +611,7 @@ #define long2StringSimplifierHandler dftSimplifier #define bitOpMemSimplifierHandler bitOpMemSimplifier #define arraycmpSimplifierHandler dftSimplifier +#define arraycmplenSimplifierHandler dftSimplifier #define allocationFenceSimplifierHandler dftSimplifier #define loadFenceSimplifierHandler dftSimplifier #define storeFenceSimplifierHandler dftSimplifier diff --git a/compiler/optimizer/SinkStores.cpp b/compiler/optimizer/SinkStores.cpp index 5f3099ff09f..95674bcca1f 100644 --- a/compiler/optimizer/SinkStores.cpp +++ b/compiler/optimizer/SinkStores.cpp @@ -1823,6 +1823,8 @@ bool TR_SinkStores::treeIsSinkableStore(TR::Node *node, bool sinkIndirectLoads, { if (node->getOpCodeValue() == TR::arraycmp) traceMsg(comp()," *arraycmp is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); + else if (node->getOpCodeValue() == TR::arraycmplen) + traceMsg(comp()," *arraycmplen is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); else if (node->getOpCodeValue() == TR::arraycopy) traceMsg(comp()," *arraycopy is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); traceMsg(comp(), " *store is a call or an excepting node*\n"); diff --git a/compiler/optimizer/ValuePropagationTable.hpp b/compiler/optimizer/ValuePropagationTable.hpp index 81ea86321f3..54d6ec16c4c 100644 --- a/compiler/optimizer/ValuePropagationTable.hpp +++ b/compiler/optimizer/ValuePropagationTable.hpp @@ -749,6 +749,7 @@ TR::Node * constrainLongBitCount(OMR::ValuePropagation *vp, TR::Node *node); #define long2StringVPHandler constrainChildren #define bitOpMemVPHandler constrainChildren #define arraycmpVPHandler constrainChildren +#define arraycmplenVPHandler constrainChildren #define allocationFenceVPHandler constrainChildren #define loadFenceVPHandler constrainChildren #define storeFenceVPHandler constrainChildren diff --git a/compiler/p/codegen/OMRCodeGenerator.cpp b/compiler/p/codegen/OMRCodeGenerator.cpp index 1c232945158..c11276c1ce2 100644 --- a/compiler/p/codegen/OMRCodeGenerator.cpp +++ b/compiler/p/codegen/OMRCodeGenerator.cpp @@ -226,6 +226,7 @@ OMR::Power::CodeGenerator::initialize() cg->setSupportsArraySet(); } cg->setSupportsArrayCmp(); + cg->setSupportsArrayCmpLen(); if (comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX)) { diff --git a/compiler/p/codegen/OMRTreeEvaluator.cpp b/compiler/p/codegen/OMRTreeEvaluator.cpp index da37a2d012f..ae82f19b12c 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.cpp +++ b/compiler/p/codegen/OMRTreeEvaluator.cpp @@ -5446,7 +5446,7 @@ static inline void loadArrayCmpSources(TR::Node *node, TR::InstOpCode::Mnemonic } } -static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) { TR::Node *src1AddrNode = node->getChild(0); TR::Node *src2AddrNode = node->getChild(1); @@ -5511,7 +5511,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) generateTrg1Src1Instruction(cg, TR::InstOpCode::vclzlsbb, node, tempReg, vec0Reg); - if (!node->isArrayCmpLen()) + if (!isArrayCmpLen) { generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, returnReg, returnReg, -1); } @@ -5523,7 +5523,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) // index = index + offset, if we need to return unmatched index, then we are done here generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, returnReg, indexReg, returnReg); - if (!node->isArrayCmpLen()) + if (!isArrayCmpLen) { generateTrg1Src2Instruction(cg, TR::InstOpCode::lbzx, node, tempReg, returnReg, src1AddrReg); generateTrg1Src2Instruction(cg, TR::InstOpCode::lbzx, node, indexReg, returnReg, src2AddrReg); @@ -5566,8 +5566,12 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) } -static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) { + static char *disableP10ArrayCmp = feGetEnv("TR_DisableP10ArrayCmp"); + if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10) && (disableP10ArrayCmp == NULL)) + return inlineArrayCmpP10(node, cg, isArrayCmpLen); + TR::Node *src1AddrNode = node->getChild(0); TR::Node *src2AddrNode = node->getChild(1); TR::Node *lengthNode = node->getChild(2); @@ -5691,7 +5695,7 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) generateLabelInstruction(cg, TR::InstOpCode::label, node, resultLabel); - if (node->isArrayCmpLen()) + if (isArrayCmpLen) generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, ccReg, byteLenRemainingRegister, byteLenRegister); else { @@ -5747,11 +5751,12 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) TR::Register *OMR::Power::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - TR::Compilation *comp = cg->comp(); - static char *disableP10ArrayCmp = feGetEnv("TR_DisableP10ArrayCmp"); - if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10) && !disableP10ArrayCmp) - return inlineArrayCmpP10(node, cg); - return inlineArrayCmp(node, cg); + return inlineArrayCmp(node, cg, false); + } + +TR::Register *OMR::Power::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return inlineArrayCmp(node, cg, true); } bool OMR::Power::TreeEvaluator::stopUsingCopyReg( diff --git a/compiler/p/codegen/OMRTreeEvaluator.hpp b/compiler/p/codegen/OMRTreeEvaluator.hpp index 19b8047ff59..8bdda32915b 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.hpp +++ b/compiler/p/codegen/OMRTreeEvaluator.hpp @@ -708,6 +708,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *compareIntsForEquality(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *compareIntsForEquality(TR::InstOpCode::Mnemonic branchOp, TR::LabelSymbol *dstLabel, TR::Node *node, TR::CodeGenerator *cg, bool isHint=false, bool likeliness=false); diff --git a/compiler/ras/Debug.cpp b/compiler/ras/Debug.cpp index c9c5eb764cb..c23657a3b5f 100644 --- a/compiler/ras/Debug.cpp +++ b/compiler/ras/Debug.cpp @@ -1053,7 +1053,6 @@ TR_Debug::nodePrintAllFlags(TR::Node *node, TR_PrettyPrinterString &output) FLAG(chkSkipZeroInitialization, "skipZeroInit"); FLAG(chkDontMoveUnderBranch, "dontMoveUnderBranch"); FLAG(chkIsPrivatizedInlinerArg, "privatizedInlinerArg"); - FLAG(chkArrayCmpLen, "arrayCmpLen"); FLAG(chkArrayCmpSign, "arrayCmpSign"); FLAG(chkXorBitOpMem, "SubOp=XOR"); FLAG(chkOrBitOpMem, "SubOp=OR"); @@ -1612,6 +1611,8 @@ TR_Debug::getName(TR::SymbolReference * symRef) return ""; case TR::SymbolReferenceTable::arrayCmpSymbol: return ""; + case TR::SymbolReferenceTable::arrayCmpLenSymbol: + return ""; case TR::SymbolReferenceTable::currentTimeMaxPrecisionSymbol: return ""; case TR::SymbolReferenceTable::encodeASCIISymbol: diff --git a/compiler/riscv/codegen/OMRTreeEvaluator.cpp b/compiler/riscv/codegen/OMRTreeEvaluator.cpp index e5449c4e775..6fc942814d9 100644 --- a/compiler/riscv/codegen/OMRTreeEvaluator.cpp +++ b/compiler/riscv/codegen/OMRTreeEvaluator.cpp @@ -3028,6 +3028,13 @@ OMR::RV::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); } +TR::Register * +OMR::RV::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + // TODO:RV: Enable TR::TreeEvaluator::arraycmplenEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. + return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); + } + TR::Register * OMR::RV::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/compiler/riscv/codegen/OMRTreeEvaluator.hpp b/compiler/riscv/codegen/OMRTreeEvaluator.hpp index 5f90a6e6614..6ed726321b9 100644 --- a/compiler/riscv/codegen/OMRTreeEvaluator.hpp +++ b/compiler/riscv/codegen/OMRTreeEvaluator.hpp @@ -504,6 +504,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *long2StringEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *bitOpMemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *allocationFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *loadFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *storeFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index d5606c8ea6d..e484dfc4750 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -380,6 +380,7 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) if (!TR::Compiler->om.canGenerateArraylets()) { self()->setSupportsArrayCmp(); + self()->setSupportsArrayCmpLen(); self()->setSupportsPrimitiveArrayCopy(); if (!comp->getOption(TR_DisableArraySetOpts)) { diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 13f5b7c2566..763f70e900b 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -1136,9 +1136,18 @@ OMR::X86::TreeEvaluator::arraycmpEvaluator( TR::Node *node, TR::CodeGenerator *cg) { - return node->isArrayCmpLen() ? TR::TreeEvaluator::SSE2ArraycmpLenEvaluator(node, cg) : TR::TreeEvaluator::SSE2ArraycmpEvaluator(node, cg); + return TR::TreeEvaluator::SSE2ArraycmpEvaluator(node, cg); } +TR::Register * +OMR::X86::TreeEvaluator::arraycmplenEvaluator( + TR::Node *node, + TR::CodeGenerator *cg) + { + return TR::TreeEvaluator::SSE2ArraycmpLenEvaluator(node, cg); + } + + TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Node *s1AddrNode = node->getChild(0); diff --git a/compiler/x/codegen/OMRTreeEvaluator.hpp b/compiler/x/codegen/OMRTreeEvaluator.hpp index d5d1f6f0ecb..72f805e1b54 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/codegen/OMRTreeEvaluator.hpp @@ -215,6 +215,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraysetEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *overflowCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/i386/codegen/OMRCodeGenerator.cpp b/compiler/x/i386/codegen/OMRCodeGenerator.cpp index d262b2d495b..152510f40a1 100644 --- a/compiler/x/i386/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/i386/codegen/OMRCodeGenerator.cpp @@ -81,6 +81,8 @@ OMR::X86::I386::CodeGenerator::initialize() cg->setSupportsArrayTranslateAndTest(); if (debug("supportsArrayCmp")) cg->setSupportsArrayCmp(); + if (debug("supportsArrayCmpLen")) + cg->setSupportsArrayCmpLen(); cg->setSupportsDoubleWordCAS(); cg->setSupportsDoubleWordSet(); diff --git a/compiler/z/codegen/OMRCodeGenerator.cpp b/compiler/z/codegen/OMRCodeGenerator.cpp index db7f9025f38..17a529af7bd 100644 --- a/compiler/z/codegen/OMRCodeGenerator.cpp +++ b/compiler/z/codegen/OMRCodeGenerator.cpp @@ -503,6 +503,7 @@ OMR::Z::CodeGenerator::initialize() cg->setSupportsArraySet(); } cg->setSupportsArrayCmp(); + cg->setSupportsArrayCmpLen(); cg->setSupportsArrayCmpSign(); if (!comp->compileRelocatableCode()) { diff --git a/compiler/z/codegen/OMRTreeEvaluator.cpp b/compiler/z/codegen/OMRTreeEvaluator.cpp index 7a2badedd2b..613e7d8f9c3 100644 --- a/compiler/z/codegen/OMRTreeEvaluator.cpp +++ b/compiler/z/codegen/OMRTreeEvaluator.cpp @@ -11764,131 +11764,145 @@ OMR::Z::TreeEvaluator::arraycmpEvaluator(TR::Node * node, TR::CodeGenerator * cg TR::Register * firstBaseReg = NULL; TR::Register * secondBaseReg = NULL; bool lenMinusOne=false; - TR_ASSERT(!node->isArrayCmpLen() || !node->isArrayCmpSign(), "Invalid arraycmp node"); - if (!node->isArrayCmpLen()) + + // use CLC + TR::Register * resultReg; + + if (elemsExpr->getOpCode().isLoadConst()) { - // use CLC - TR::Register * resultReg; + int64_t elems = static_cast(getIntegralValue(elemsExpr)); //get number of elements (in bytes) + bool clobber = (comp->getOption(TR_DisableSSOpts) || elems>256 || elems==0 || node->isArrayCmpSign()); + if (!node->isArrayCmpSign()) + { + resultReg = TR::TreeEvaluator::arraycmpHelper( + node, + cg, + false, //isWideChar + true, //isEqualCmp + 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" + NULL, //compareTarget + NULL, //ificmpNode + true, //needResultReg + true); //return102 + // node->setRegister(resultReg); + return resultReg; + } + else + { + MemCmpConstLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elems); + ClobberRegisterForLoops(clobber,op,firstBaseAddr,firstBaseReg); + ClobberRegisterForLoops(clobber,op,secondBaseAddr,secondBaseReg); + op.generate(firstBaseReg, secondBaseReg); + resultReg = op.resultReg(); + } + } + else + { + TR::Register * elemsReg; - if (elemsExpr->getOpCode().isLoadConst()) + if (!node->isArrayCmpSign()) { - int32_t elems = (int32_t) getIntegralValue(elemsExpr); //get number of elements (in bytes) - bool clobber = (comp->getOption(TR_DisableSSOpts) || elems>256 || elems==0 || node->isArrayCmpSign()); - if (!node->isArrayCmpSign()) - { - resultReg = TR::TreeEvaluator::arraycmpHelper( - node, - cg, - false, //isWideChar - true, //isEqualCmp - 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" - NULL, //compareTarget - NULL, //ificmpNode - true, //needResultReg - true); //return102 - // node->setRegister(resultReg); - return resultReg; - } - else - { - MemCmpConstLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elems); - ClobberRegisterForLoops(clobber,op,firstBaseAddr,firstBaseReg); - ClobberRegisterForLoops(clobber,op,secondBaseAddr,secondBaseReg); - op.generate(firstBaseReg, secondBaseReg); - resultReg = op.resultReg(); - } + resultReg = TR::TreeEvaluator::arraycmpHelper( + node, + cg, + false, //isWideChar + true, //isEqualCmp + 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" + NULL, //compareTarget + NULL, //ificmpNode + true, //needResultReg + true); //return102 + + // node->setRegister(resultReg); + return resultReg; } else { - TR::Register * elemsReg; + elemsReg = cg->evaluateLengthMinusOneForMemoryOps(elemsExpr, true, lenMinusOne); + firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); + secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - if (!node->isArrayCmpSign()) - { - resultReg = TR::TreeEvaluator::arraycmpHelper( - node, - cg, - false, //isWideChar - true, //isEqualCmp - 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" - NULL, //compareTarget - NULL, //ificmpNode - true, //needResultReg - true); //return102 - - // node->setRegister(resultReg); - return resultReg; - } - else - { - elemsReg = cg->evaluateLengthMinusOneForMemoryOps(elemsExpr, true, lenMinusOne); - firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); - secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - - MemCmpVarLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elemsReg, elemsExpr); - op.generate(firstBaseReg, secondBaseReg); - resultReg = op.resultReg(); - cg->stopUsingRegister(elemsReg); - } + MemCmpVarLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elemsReg, elemsExpr); + op.generate(firstBaseReg, secondBaseReg); + resultReg = op.resultReg(); + cg->stopUsingRegister(elemsReg); } + } - cg->decReferenceCount(elemsExpr); - if (firstBaseReg!=NULL) cg->decReferenceCount(firstBaseAddr); - if (secondBaseReg!=NULL) cg->decReferenceCount(secondBaseAddr); + cg->decReferenceCount(elemsExpr); + if (firstBaseReg!=NULL) cg->decReferenceCount(firstBaseAddr); + if (secondBaseReg!=NULL) cg->decReferenceCount(secondBaseAddr); - if (firstBaseReg!=NULL) cg->stopUsingRegister(firstBaseReg); - if (secondBaseReg!=NULL) cg->stopUsingRegister(secondBaseReg); + if (firstBaseReg!=NULL) cg->stopUsingRegister(firstBaseReg); + if (secondBaseReg!=NULL) cg->stopUsingRegister(secondBaseReg); - TR_ASSERT( resultReg!=firstBaseReg && resultReg!=secondBaseReg, "arraycmpEvaluator -- result reg should be a new reg\n"); + TR_ASSERT( resultReg!=firstBaseReg && resultReg!=secondBaseReg, "arraycmpEvaluator -- result reg should be a new reg\n"); - node->setRegister(resultReg); - return resultReg; - } - else + node->setRegister(resultReg); + return resultReg; + } + +TR::Register * +OMR::Z::TreeEvaluator::arraycmplenEvaluator(TR::Node * node, TR::CodeGenerator * cg) + { + TR::Compilation *comp = cg->comp(); + + if (TR::isJ9() && !comp->getOption(TR_DisableSIMDArrayCompare) && cg->getSupportsVectorRegisters()) { - // use CLCL instruction - firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); - secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); + // An empirical study has showed that CLC is faster for all array sizes if the number of bytes to copy is known to be constant + if (!node->getChild(2)->getOpCode().isLoadConst()) + return TR::TreeEvaluator::arraycmpSIMDHelper(node, cg, NULL, NULL, true, !node->isArrayCmpSign()/*return102*/, true); + } + + TR::Node * firstBaseAddr = node->getFirstChild(); + TR::Node * secondBaseAddr = node->getSecondChild(); + TR::Node * elemsExpr = node->getChild(2); - TR::Register * orgLen = cg->gprClobberEvaluate(elemsExpr); - TR::Register * firstLen = cg->allocateRegister(); - TR::Register * secondLen = cg->allocateRegister(); - TR::RegisterPair * firstPair = cg->allocateConsecutiveRegisterPair(firstLen, firstBaseReg); - TR::RegisterPair * secondPair = cg->allocateConsecutiveRegisterPair(secondLen, secondBaseReg); - TR::Register * resultReg = cg->allocateRegister(); - TR::Instruction * cursor; + TR::Register * firstBaseReg = NULL; + TR::Register * secondBaseReg = NULL; + bool lenMinusOne=false; - TR::RegisterDependencyConditions * dependencies = cg->createDepsForRRMemoryInstructions(node, firstPair, secondPair); + // use CLCL instruction + firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); + secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, firstLen, orgLen); - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, secondLen, orgLen); - cursor = generateRRInstruction(cg, TR::InstOpCode::CLCL, node, firstPair, secondPair); + TR::Register * orgLen = cg->gprClobberEvaluate(elemsExpr); + TR::Register * firstLen = cg->allocateRegister(); + TR::Register * secondLen = cg->allocateRegister(); + TR::RegisterPair * firstPair = cg->allocateConsecutiveRegisterPair(firstLen, firstBaseReg); + TR::RegisterPair * secondPair = cg->allocateConsecutiveRegisterPair(secondLen, secondBaseReg); + TR::Register * resultReg = cg->allocateRegister(); + TR::Instruction * cursor; - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, orgLen); - cursor = generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, resultReg, firstLen); + TR::RegisterDependencyConditions * dependencies = cg->createDepsForRRMemoryInstructions(node, firstPair, secondPair); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, firstLen, orgLen); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, secondLen, orgLen); + cursor = generateRRInstruction(cg, TR::InstOpCode::CLCL, node, firstPair, secondPair); - cg->stopUsingRegister(firstPair); - cg->stopUsingRegister(secondPair); - cg->stopUsingRegister(firstBaseReg); - cg->stopUsingRegister(secondBaseReg); - cg->stopUsingRegister(firstLen); - cg->stopUsingRegister(secondLen); - cg->stopUsingRegister(orgLen); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, orgLen); + cursor = generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, resultReg, firstLen); - cg->decReferenceCount(elemsExpr); - cg->decReferenceCount(firstBaseAddr); - cg->decReferenceCount(secondBaseAddr); - cursor->setDependencyConditions(dependencies); + cg->stopUsingRegister(firstPair); + cg->stopUsingRegister(secondPair); + cg->stopUsingRegister(firstBaseReg); + cg->stopUsingRegister(secondBaseReg); + cg->stopUsingRegister(firstLen); + cg->stopUsingRegister(secondLen); + cg->stopUsingRegister(orgLen); - node->setRegister(resultReg); - return resultReg; + cg->decReferenceCount(elemsExpr); + cg->decReferenceCount(firstBaseAddr); + cg->decReferenceCount(secondBaseAddr); + cursor->setDependencyConditions(dependencies); - } - } + node->setRegister(resultReg); + return resultReg; + } #define TRTSIZE 256 TR::Register * @@ -15722,7 +15736,8 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, TR::LabelSymbol *compareTarget, TR::Node *ificmpNode, bool needResultReg, - bool return102) + bool return102, + bool isArrayCmpLen) { // Similar to arraycmpHelper, except it uses vector instructions and supports arraycmpsign and arraycmplen // Does not currently support aggregates or wide chars @@ -15734,11 +15749,8 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, TR::Node * secondAddrNode = return102 ? node->getSecondChild() : node->getFirstChild(); TR::Node * elemsExpr = node->getChild(2); bool isFoldedIf = compareTarget != NULL; - bool isArrayCmp = node->getOpCodeValue() == TR::arraycmp; TR::Compilation *comp = cg->comp(); - TR_ASSERT( !(isArrayCmp && node->isArrayCmpLen() && node->isArrayCmpSign()), "Invalid arraycmp node"); - TR::InstOpCode::S390BranchCondition ifxcmpBrCond = TR::InstOpCode::COND_NOP; if (isFoldedIf) { @@ -15759,7 +15771,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, // VLL uses lastByteIndexReg as the highest 0-based index to load, which is length - 1 generateRILInstruction(cg, TR::InstOpCode::getSubtractLogicalImmOpCode(), node, lastByteIndexReg, 1); - if(needResultReg && isArrayCmp && node->isArrayCmpLen()) + if(needResultReg && isArrayCmpLen) generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, lastByteIndexReg); TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg); @@ -15799,7 +15811,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, } else if(needResultReg) { - if(isArrayCmp && node->isArrayCmpLen()) + if(isArrayCmpLen) generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, resultReg, 1);//Return length of the arrays, which is resultReg += 1 else generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, resultReg, resultReg);//Return zero to indicate equal @@ -15814,7 +15826,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, } else if(needResultReg) { - if(isArrayCmp && node->isArrayCmpLen()) + if(isArrayCmpLen) { // Return 0-based index of first non-matching element // resultReg - lastByteIndexReg = number of elements compared before the last loop diff --git a/compiler/z/codegen/OMRTreeEvaluator.hpp b/compiler/z/codegen/OMRTreeEvaluator.hpp index 537f2ce3ab3..06297753eba 100644 --- a/compiler/z/codegen/OMRTreeEvaluator.hpp +++ b/compiler/z/codegen/OMRTreeEvaluator.hpp @@ -1046,7 +1046,8 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator TR::LabelSymbol *compareTarget = NULL, TR::Node *ificmpNode = NULL, bool needResultReg = true, - bool return102 = false); + bool return102 = false, + bool isArrayCmpLen = false); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEncodeSIMDEvaluator(TR::Node *node, TR::CodeGenerator *cg, ArrayTranslateFlavor convType); @@ -1055,6 +1056,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *long2StringEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *bitOpMemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBStartEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBEndEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/fvtest/compilertriltest/ArrayTest.cpp b/fvtest/compilertriltest/ArrayTest.cpp index b92119fef46..d975dbc12d4 100644 --- a/fvtest/compilertriltest/ArrayTest.cpp +++ b/fvtest/compilertriltest/ArrayTest.cpp @@ -42,7 +42,7 @@ class ArraycmpEqualTest : public TRTest::JitTest, public ::testing::WithParamInt */ class ArraycmpNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; -TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { +TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -50,14 +50,12 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { char inputTrees[1024] = {0}; /* * "address=0" parameter is needed for arraycmp opcode because "Call" property is set to the opcode. - * We need "flags=15" parameter to set arrayCmpLen flag. - * arrayCmpLen flag is defined as 0x8000, which is 1 << 15. */ std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -73,10 +71,10 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { std::vector s1(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s1[0])); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s1[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { +TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -86,7 +84,7 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -103,10 +101,10 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s2[0])); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { +TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -116,7 +114,7 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iload parm=2)))))" @@ -132,14 +130,17 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s2[0], length)); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { +INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpEqualTest, ::testing::Range(1, 128)); + +TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address]" @@ -160,25 +161,28 @@ TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; std::vector s1(length, 0x5c); + std::vector s2(length, 0x5c); + s1[offset] = 0x81; + auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s1[0])); + EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", - length + " (iload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -190,24 +194,28 @@ TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0])); + s1[offset] = 0x81; + + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (iconst %d)))))", + length ); auto trees = parseString(inputTrees); @@ -219,13 +227,13 @@ TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0], length)); -} + s1[offset] = 0x21; -INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpEqualTest, ::testing::Range(1, 128)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0])); +} -TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -233,14 +241,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", - length + " (iload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -252,27 +259,73 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x3f; + s1[offset] = 0x21; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(offset, entry_point(&s1[0], &s2[0])); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0], length)); +} + +static std::vector> createArraycmpNotEqualParam() { + std::vector> v; + /* Small arrays */ + for (int i = 1; i < 32; i++) { + for (int j = 0; j < i; j++) { + v.push_back(std::make_tuple(i, j)); + } + } + /* Variation of the offset of mismatched element in 128 bytes array */ + for (int i = 0; i < 128; i++) { + v.push_back(std::make_tuple(128, i)); + } + /* Medium size arrays with the mismatched element near the end of the arrays */ + for (int i = 120; i < 136; i++) { + for (int j = 96; j < i; j++) { + v.push_back(std::make_tuple(i, j)); + } + } + /* A large size array with the mismatched element near the end of the array */ + for (int i = 4000; i < 4096; i++) { + v.push_back(std::make_tuple(4096, i)); + } + return v; } +INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); -TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualVariableLen) { + +/** + * @brief TestFixture class for arraycmplen test + * + * @details Used for arraycmplen test with the arrays with same data. + * The parameter is the length parameter for the arraycmp evaluator. + */ +class ArraycmplenEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface {}; +/** + * @brief TestFixture class for arraycmplen test + * + * @details Used for arraycmplen test which has mismatched element. + * The first parameter is the length parameter for the arraycmp evaluator. + * The second parameter is the offset of the mismatched element in the arrays. + */ +class ArraycmplenNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; + +TEST_P(ArraycmplenEqualTest, ArraycmpLenSameArray) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; + /* + * "address=0" parameter is needed for arraycmp opcode because "Call" property is set to the opcode. + */ std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (iconst %d)))))", + length ); auto trees = parseString(inputTrees); @@ -283,25 +336,21 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualVariableLen) { ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; std::vector s1(length, 0x5c); - std::vector s2(length, 0x5c); - s1[offset] = 0x3f; - - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(offset, entry_point(&s1[0], &s2[0], length)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(length, entry_point(&s1[0], &s1[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { +TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -317,24 +366,21 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x81; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0])); + EXPECT_EQ(length, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { +TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iload parm=2)))))" @@ -349,13 +395,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x81; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0], length)); + EXPECT_EQ(length, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenEqualTest, ::testing::Range(1L, 128L)); + +TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -366,7 +412,7 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -382,13 +428,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x21; + s1[offset] = 0x3f; auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0])); + EXPECT_EQ(offset, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { +TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -399,7 +445,7 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iload parm=2)))))" @@ -414,34 +460,10 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x21; + s1[offset] = 0x3f; auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0], length)); + EXPECT_EQ(offset, entry_point(&s1[0], &s2[0], length)); } -static std::vector> createArraycmpNotEqualParam() { - std::vector> v; - /* Small arrays */ - for (int i = 1; i < 32; i++) { - for (int j = 0; j < i; j++) { - v.push_back(std::make_tuple(i, j)); - } - } - /* Variation of the offset of mismatched element in 128 bytes array */ - for (int i = 0; i < 128; i++) { - v.push_back(std::make_tuple(128, i)); - } - /* Medium size arrays with the mismatched element near the end of the arrays */ - for (int i = 120; i < 136; i++) { - for (int j = 96; j < i; j++) { - v.push_back(std::make_tuple(i, j)); - } - } - /* A large size array with the mismatched element near the end of the array */ - for (int i = 4000; i < 4096; i++) { - v.push_back(std::make_tuple(4096, i)); - } - return v; -} -INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); From 848971a41b19f07c0f298d134296974be8b68a4f Mon Sep 17 00:00:00 2001 From: Spencer-Comin Date: Mon, 14 Aug 2023 15:17:00 -0400 Subject: [PATCH 3/3] Change input length and output of arraycmplen from 4 bytes to 8 bytes Signed-off-by: Spencer Comin Co-authored-by: jimmyk --- compiler/aarch64/codegen/OMRTreeEvaluator.cpp | 31 +++-- compiler/il/OMROpcodes.enum | 6 +- compiler/p/codegen/OMRTreeEvaluator.cpp | 127 +++++++++++++++--- compiler/x/codegen/OMRTreeEvaluator.cpp | 22 ++- fvtest/compilertriltest/ArrayTest.cpp | 55 ++++---- 5 files changed, 183 insertions(+), 58 deletions(-) diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index 0e8b3aa97b8..2f6ac95e8ca 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6891,7 +6891,7 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel); if (isArrayCmpLen) { - generateMovInstruction(cg, node, resultReg, lengthReg, false); + generateMovInstruction(cg, node, resultReg, lengthReg, true); } else { @@ -6900,7 +6900,7 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe generateCompareInstruction(cg, node, src1Reg, src2Reg, true); if (!isLengthGreaterThan15) { - auto ccmpLengthInstr = generateConditionalCompareImmInstruction(cg, node, lengthReg, 0, 4, TR::CC_NE); /* 4 for Z flag */ + auto ccmpLengthInstr = generateConditionalCompareImmInstruction(cg, node, lengthReg, 0, 4, TR::CC_NE, /* is64bit */ isArrayCmpLen); /* 4 for Z flag */ if (debugObj) { debugObj->addInstructionComment(ccmpLengthInstr, "Compares lengthReg with 0 if src1 and src2 are not the same array. Otherwise, sets EQ flag."); @@ -6922,14 +6922,14 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe TR::Register *data4Reg = srm->findOrCreateScratchRegister(); if (!isLengthGreaterThan15) { - generateCompareImmInstruction(cg, node, lengthReg, 16); + generateCompareImmInstruction(cg, node, lengthReg, 16, /* is64bit */ isArrayCmpLen); auto branchToLessThan16LabelInstr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, lessThan16Label, TR::CC_CC); if (debugObj) { debugObj->addInstructionComment(branchToLessThan16LabelInstr, "Jumps to lessThan16Label if length < 16."); } } - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmw, node, lengthReg, lengthReg, 16); + generateTrg1Src1ImmInstruction(cg, isArrayCmpLen ? TR::InstOpCode::subimmx : TR::InstOpCode::subimmw, node, lengthReg, lengthReg, 16); TR::LabelSymbol *loop16Label = generateLabelSymbol(cg); { @@ -6946,7 +6946,7 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe } generateConditionalCompareInstruction(cg, node, data3Reg, data4Reg, 0, TR::CC_EQ, true); auto branchToNotEqual16LabelInstr2 = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, notEqual16Label, TR::CC_NE); - auto subtractLengthInstr = generateTrg1Src1ImmInstruction(cg, isLengthGreaterThan15 ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 16); + auto subtractLengthInstr = generateTrg1Src1ImmInstruction(cg, (isLengthGreaterThan15 || isArrayCmpLen) ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 16); auto branchBacktoLoop16LabelInstr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loop16Label, TR::CC_CS); if (debugObj) { @@ -6965,7 +6965,10 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe auto branchToDoneLabelInstr3 = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, isArrayCmpLen ? done0Label : doneLabel, TR::CC_EQ); auto adjustSrc1RegInstr = generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, src1Reg, src1Reg, lengthReg); generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, src2Reg, src2Reg, lengthReg); - loadConstant32(cg, node, 0, lengthReg); + if (isArrayCmpLen) + loadConstant64(cg, node, 0, lengthReg); + else + loadConstant32(cg, node, 0, lengthReg); auto branchBacktoLoop16LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, loop16Label); if (debugObj) { @@ -6983,8 +6986,18 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe } else { - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmw, node, lengthReg, lengthReg, 16); - auto branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, lengthReg, isArrayCmpLen ? done0Label : doneLabel); + TR::Instruction *branchToDoneLabelInstr3; + if (isArrayCmpLen) + { + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, lengthReg, lengthReg, 16); + branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, lengthReg, done0Label); + } + else + { + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmw, node, lengthReg, lengthReg, 16); + branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, lengthReg, doneLabel); + } + auto branchToLessThan16Label2 = generateLabelInstruction(cg, TR::InstOpCode::b, node, lessThan16Label); if (debugObj) @@ -7041,7 +7054,7 @@ arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLe auto branchToDone0LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, done0Label); auto lessThan16LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::label, node, lessThan16Label); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 1); + generateTrg1Src1ImmInstruction(cg, isArrayCmpLen ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 1); generateTrg1MemInstruction(cg, TR::InstOpCode::ldrbpost, node, data1Reg, TR::MemoryReference::createWithDisplacement(cg, src1Reg, 1)); generateTrg1MemInstruction(cg, TR::InstOpCode::ldrbpost, node, data2Reg, TR::MemoryReference::createWithDisplacement(cg, src2Reg, 1)); generateConditionalCompareInstruction(cg, node, data1Reg, data2Reg, 0, TR::CC_HI); diff --git a/compiler/il/OMROpcodes.enum b/compiler/il/OMROpcodes.enum index 2dba4d20fa3..f3aa5d0a26b 100644 --- a/compiler/il/OMROpcodes.enum +++ b/compiler/il/OMROpcodes.enum @@ -7212,9 +7212,9 @@ OPCODE_MACRO(\ /* .properties2 = */ 0, \ /* .properties3 = */ ILProp3::LikeUse | ILProp3::LikeDef | ILProp3::SkipDynamicLitPoolOnInts, \ /* .properties4 = */ 0, \ - /* .dataType = */ TR::Int32, \ - /* .typeProperties = */ ILTypeProp::Size_4 | ILTypeProp::Integer, \ - /* .childProperties = */ THREE_CHILD(TR::Address, TR::Address, TR::Int32), \ + /* .dataType = */ TR::Int64, \ + /* .typeProperties = */ ILTypeProp::Size_8 | ILTypeProp::Integer | ILTypeProp::Unsigned, \ + /* .childProperties = */ THREE_CHILD(TR::Address, TR::Address, TR::Int64), \ /* .swapChildrenOpCode = */ TR::BadILOp, \ /* .reverseBranchOpCode = */ TR::BadILOp, \ /* .booleanCompareOpCode = */ TR::BadILOp, \ diff --git a/compiler/p/codegen/OMRTreeEvaluator.cpp b/compiler/p/codegen/OMRTreeEvaluator.cpp index ae82f19b12c..d6b285f817a 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.cpp +++ b/compiler/p/codegen/OMRTreeEvaluator.cpp @@ -5447,7 +5447,7 @@ static inline void loadArrayCmpSources(TR::Node *node, TR::InstOpCode::Mnemonic } static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) -{ + { TR::Node *src1AddrNode = node->getChild(0); TR::Node *src2AddrNode = node->getChild(1); TR::Node *lengthNode = node->getChild(2); @@ -5458,6 +5458,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bo TR::Register *returnReg = cg->allocateRegister(TR_GPR); TR::Register *tempReg = cg->gprClobberEvaluate(lengthNode); TR::Register *temp2Reg = cg->allocateRegister(TR_GPR); + TR::Register *pairReg = nullptr; TR::Register *vec0Reg = cg->allocateRegister(TR_VRF); TR::Register *vec1Reg = cg->allocateRegister(TR_VRF); @@ -5469,17 +5470,32 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bo TR::LabelSymbol *endLabel = generateLabelSymbol(cg); TR::LabelSymbol *resultLabel = generateLabelSymbol(cg); + bool is64bit = cg->comp()->target().is64Bit(); + + if (isArrayCmpLen && !is64bit) + { + pairReg = tempReg; + tempReg = tempReg->getLowOrder(); + } + generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel); startLabel->setStartInternalControlFlow(); generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, indexReg, 0); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpi4, node, condReg, tempReg, 16); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg, tempReg, 16); // We don't need length anymore as we can calculate the appropriate index by using indexReg and the remainder generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, returnReg, tempReg, 0, 0xF); generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, residueStartLabel, condReg); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::srawi, node, tempReg, tempReg, 4); + if (is64bit && isArrayCmpLen) + { + generateShiftRightLogicalImmediateLong(cg, node, tempReg, tempReg, 4); + } + else + { + generateShiftRightLogicalImmediate(cg, node, tempReg, tempReg, 4); + } generateSrc1Instruction(cg, TR::InstOpCode::mtctr, node, tempReg); generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStartLabel); @@ -5535,6 +5551,10 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bo generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, returnReg, tempReg, 2, 3); generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, returnReg, returnReg, tempReg); } + else if (!is64bit) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, temp2Reg, 0); + } int32_t numRegs = 9; @@ -5555,15 +5575,27 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bo generateDepLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, dependencies); endLabel->setEndInternalControlFlow(); - node->setRegister(returnReg); + if (isArrayCmpLen && !is64bit) + { + TR::Register *lowReturnReg = returnReg; + returnReg = cg->allocateRegisterPair(returnReg, temp2Reg); + node->setRegister(returnReg); + TR::Register *liveRegs[4] = { src1AddrReg, src2AddrReg, lowReturnReg, temp2Reg }; + dependencies->stopUsingDepRegs(cg, 4, liveRegs); + cg->stopUsingRegister(pairReg); + } + else + { + node->setRegister(returnReg); + TR::Register *liveRegs[3] = { src1AddrReg, src2AddrReg, returnReg }; + dependencies->stopUsingDepRegs(cg, 3, liveRegs); + } cg->decReferenceCount(src1AddrNode); cg->decReferenceCount(src2AddrNode); cg->decReferenceCount(lengthNode); - TR::Register *liveRegs[3] = { src1AddrReg, src2AddrReg, returnReg }; - dependencies->stopUsingDepRegs(cg, 3, liveRegs); return returnReg; -} + } static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) @@ -5597,11 +5629,22 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool TR::Register *src1AddrReg = cg->gprClobberEvaluate(src1AddrNode); TR::Register *src2AddrReg = cg->gprClobberEvaluate(src2AddrNode); - byteLen = 4; - if (cg->comp()->target().is64Bit()) + bool is64bit = cg->comp()->target().is64Bit(); + + if (is64bit) + { byteLen = 8; + } + else + { + byteLen = 4; + } byteLenRegister = cg->evaluate(lengthNode); + if (isArrayCmpLen && !is64bit) + { + byteLenRegister = byteLenRegister->getLowOrder(); + } byteLenRemainingRegister = cg->allocateRegister(TR_GPR); tempReg = cg->allocateRegister(TR_GPR); @@ -5617,13 +5660,20 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool condReg2 = cg->allocateRegister(TR_CCR); mid2Label = generateLabelSymbol(cg); - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, byteLen); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, byteLen); generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, mid2Label, condReg2); generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi2, node, src1AddrReg, src1AddrReg, -1*byteLen); generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi2, node, src2AddrReg, src2AddrReg, -1*byteLen); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::srawi, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + if (is64bit && isArrayCmpLen) + { + generateShiftRightLogicalImmediateLong(cg, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + } + else + { + generateShiftRightLogicalImmediate(cg, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + } generateSrc1Instruction(cg, TR::InstOpCode::mtctr, node, tempReg); loopStartLabel = generateLabelSymbol(cg); @@ -5650,7 +5700,21 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool generateTrg1MemInstruction (cg, TR::InstOpCode::ldu, node, src2Reg, TR::MemoryReference::createWithDisplacement(cg, src2AddrReg, 8, 8)); } - TR::Register *ccReg = cg->allocateRegister(TR_GPR); + TR::Register *ccReg = nullptr; + TR::Register *lowReturnReg = nullptr; + TR::Register *highReturnReg = nullptr; + + if (!is64bit && isArrayCmpLen) + { + lowReturnReg = cg->allocateRegister(TR_GPR); + highReturnReg = cg->allocateRegister(TR_GPR); + ccReg = cg->allocateRegisterPair(lowReturnReg, highReturnReg); + } + else + { + ccReg = cg->allocateRegister(TR_GPR); + } + generateTrg1Src2Instruction(cg, (byteLen == 8) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmp4, node, condReg, src1Reg, src2Reg); generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, residueStartLabel, condReg); @@ -5664,12 +5728,17 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool generateTrg1Instruction(cg, TR::InstOpCode::mfctr, node, byteLenRemainingRegister); - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, 0); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, 0); generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, byteLenRemainingRegister, byteLenRemainingRegister, tempReg); - generateShiftLeftImmediate(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + + if (is64bit && isArrayCmpLen) + generateShiftLeftImmediateLong(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + else + generateShiftLeftImmediate(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, midLabel, condReg2); - generateTrg1Src2Instruction(cg, (byteLen == 8) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmp4, node, condReg2, byteLenRemainingRegister, byteLenRegister); + generateTrg1Src2Instruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmpl4, node, condReg2, byteLenRemainingRegister, byteLenRegister); generateLabelInstruction(cg, TR::InstOpCode::label, node, midLabel); generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, byteLenRemainingRegister, byteLenRemainingRegister, byteLenRegister); generateLabelInstruction(cg, TR::InstOpCode::label, node, mid2Label); @@ -5696,10 +5765,20 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool generateLabelInstruction(cg, TR::InstOpCode::label, node, resultLabel); if (isArrayCmpLen) - generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, ccReg, byteLenRemainingRegister, byteLenRegister); + { + if (is64bit) + { + generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, ccReg, byteLenRemainingRegister, byteLenRegister); + } + else + { + generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, lowReturnReg, byteLenRemainingRegister, byteLenRegister); + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, highReturnReg, 0); + } + } else { - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, 0); + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, 0); generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, result2Label, condReg2); generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, ccReg, 0); generateLabelInstruction(cg, TR::InstOpCode::b, node, residueEndLabel); @@ -5710,6 +5789,10 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool } int32_t numRegs = 10; + if (!is64bit && isArrayCmpLen) + { + numRegs = 11; + } TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numRegs, cg->trMemory()); dependencies->addPostCondition(src1Reg, TR::RealRegister::NoReg); @@ -5719,7 +5802,15 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool dependencies->addPostCondition(byteLenRegister, TR::RealRegister::NoReg); dependencies->addPostCondition(byteLenRemainingRegister, TR::RealRegister::NoReg); dependencies->addPostCondition(tempReg, TR::RealRegister::NoReg); - dependencies->addPostCondition(ccReg, TR::RealRegister::NoReg); + if (!is64bit && isArrayCmpLen) + { + dependencies->addPostCondition(lowReturnReg, TR::RealRegister::NoReg); + dependencies->addPostCondition(highReturnReg, TR::RealRegister::NoReg); + } + else + { + dependencies->addPostCondition(ccReg, TR::RealRegister::NoReg); + } dependencies->addPostCondition(condReg, TR::RealRegister::NoReg); dependencies->addPostCondition(condReg2, TR::RealRegister::NoReg); diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 763f70e900b..061af915132 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -1301,7 +1301,8 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, TR::Register *s1Reg = cg->gprClobberEvaluate(s1AddrNode, TR::InstOpCode::MOVRegReg()); TR::Register *s2Reg = cg->gprClobberEvaluate(s2AddrNode, TR::InstOpCode::MOVRegReg()); - TR::Register *strLenReg = cg->gprClobberEvaluate(lengthNode, TR::InstOpCode::MOVRegReg()); + TR::Register *strLenReg = cg->longClobberEvaluate(lengthNode); + TR::Register *highReg = NULL; TR::Register *equalTestReg = cg->allocateRegister(TR_GPR); TR::Register *s2ByteReg = cg->allocateRegister(TR_GPR); TR::Register *byteCounterReg = cg->allocateRegister(TR_GPR); @@ -1312,6 +1313,14 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, TR::Machine *machine = cg->machine(); + if (cg->comp()->target().is32Bit() && strLenReg->getRegisterPair()) + { + // On 32-bit, the length is guaranteed to fit into the bottom 32 bits + strLenReg = strLenReg->getLowOrder(); + // The high 32 bits will all be zero, so we can save this reg to zero-extend the final result + highReg = strLenReg->getHighOrder(); + } + generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, resultReg, 0, cg); generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg); generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, qwordCounterReg, strLenReg, cg); @@ -1378,6 +1387,17 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, deps->addPostCondition(s1Reg, TR::RealRegister::NoReg, cg); generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg); + + if (cg->comp()->target().is32Bit()) + { + if (highReg == NULL) + { + highReg = cg->allocateRegister(TR_GPR); + generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, highReg, 0, cg); + } + resultReg = cg->allocateRegisterPair(resultReg, highReg); + } + node->setRegister(resultReg); cg->decReferenceCount(s1AddrNode); diff --git a/fvtest/compilertriltest/ArrayTest.cpp b/fvtest/compilertriltest/ArrayTest.cpp index d975dbc12d4..274f82647f1 100644 --- a/fvtest/compilertriltest/ArrayTest.cpp +++ b/fvtest/compilertriltest/ArrayTest.cpp @@ -265,8 +265,9 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0], length)); } -static std::vector> createArraycmpNotEqualParam() { - std::vector> v; +template +static std::vector> createArraycmpNotEqualParam() { + std::vector> v; /* Small arrays */ for (int i = 1; i < 32; i++) { for (int j = 0; j < i; j++) { @@ -289,7 +290,7 @@ static std::vector> createArraycmpNotEqualParam() { } return v; } -INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); +INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); /** @@ -298,7 +299,7 @@ INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn( * @details Used for arraycmplen test with the arrays with same data. * The parameter is the length parameter for the arraycmp evaluator. */ -class ArraycmplenEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface {}; +class ArraycmplenEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface {}; /** * @brief TestFixture class for arraycmplen test * @@ -306,7 +307,7 @@ class ArraycmplenEqualTest : public TRTest::JitTest, public ::testing::WithParam * The first parameter is the length parameter for the arraycmp evaluator. * The second parameter is the offset of the mismatched element in the arrays. */ -class ArraycmplenNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; +class ArraycmplenNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; TEST_P(ArraycmplenEqualTest, ArraycmpLenSameArray) { SKIP_ON_ARM(MissingImplementation); @@ -318,13 +319,13 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenSameArray) { * "address=0" parameter is needed for arraycmp opcode because "Call" property is set to the opcode. */ std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" + " (lreturn" " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", + " (lconst %" OMR_PRId64 ")))))", length ); auto trees = parseString(inputTrees); @@ -336,7 +337,7 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenSameArray) { ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; std::vector s1(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); + auto entry_point = compiler.getEntryPoint(); EXPECT_EQ(length, entry_point(&s1[0], &s1[0])); } @@ -347,13 +348,13 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualConstLen) { auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" + " (lreturn" " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", + " (lconst %" OMR_PRId64 ")))))", length ); auto trees = parseString(inputTrees); @@ -366,7 +367,7 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); + auto entry_point = compiler.getEntryPoint(); EXPECT_EQ(length, entry_point(&s1[0], &s2[0])); } @@ -377,13 +378,13 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualVariableLen) { auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int64 args=[Address, Address, Int64]" " (block" - " (ireturn" + " (lreturn" " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (lload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -395,11 +396,11 @@ TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); + auto entry_point = compiler.getEntryPoint(); EXPECT_EQ(length, entry_point(&s1[0], &s2[0], length)); } -INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenEqualTest, ::testing::Range(1L, 128L)); +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenEqualTest, ::testing::Range(static_cast(1), static_cast(128))); TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualConstLen) { SKIP_ON_ARM(MissingImplementation); @@ -409,13 +410,13 @@ TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualConstLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" + " (lreturn" " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", + " (lconst %" OMR_PRId64 ")))))", length ); auto trees = parseString(inputTrees); @@ -430,7 +431,7 @@ TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualConstLen) { std::vector s2(length, 0x5c); s1[offset] = 0x3f; - auto entry_point = compiler.getEntryPoint(); + auto entry_point = compiler.getEntryPoint(); EXPECT_EQ(offset, entry_point(&s1[0], &s2[0])); } @@ -442,13 +443,13 @@ TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualVariableLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int64 args=[Address, Address, Int64]" " (block" - " (ireturn" + " (lreturn" " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (lload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -462,8 +463,8 @@ TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualVariableLen) { std::vector s2(length, 0x5c); s1[offset] = 0x3f; - auto entry_point = compiler.getEntryPoint(); + auto entry_point = compiler.getEntryPoint(); EXPECT_EQ(offset, entry_point(&s1[0], &s2[0], length)); } -INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam()));