diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index 9339257fc37..6b2e74a80d7 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -184,6 +184,11 @@ OMR::ARM64::CodeGenerator::initialize() { cg->setSupportsArrayCmp(); } + static const bool disableArrayCmpLen = feGetEnv("TR_aarch64DisableArrayCmpLen") != NULL; + if (!disableArrayCmpLen) + { + cg->setSupportsArrayCmpLen(); + } } if (!comp->getOption(TR_DisableArraySetOpts)) { diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index 4e72b974a9d..9e9578efb64 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6796,8 +6796,8 @@ OMR::ARM64::TreeEvaluator::arraysetEvaluator(TR::Node *node, TR::CodeGenerator * return NULL; } -TR::Register * -OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register * +arraycmpEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) { /* * Generating following instruction sequence @@ -6903,7 +6903,6 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * TR::Node *src2Node = node->getSecondChild(); TR::Node *lengthNode = node->getThirdChild(); bool isLengthGreaterThan15 = lengthNode->getOpCode().isLoadConst() && lengthNode->getConstValue() > 15; - const bool isArrayCmpLen = node->isArrayCmpLen(); TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager(12); TR::Register *savedSrc1Reg = cg->evaluate(src1Node); @@ -6930,7 +6929,7 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel); if (isArrayCmpLen) { - generateMovInstruction(cg, node, resultReg, lengthReg, false); + generateMovInstruction(cg, node, resultReg, lengthReg, true); } else { @@ -6939,7 +6938,7 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * generateCompareInstruction(cg, node, src1Reg, src2Reg, true); if (!isLengthGreaterThan15) { - auto ccmpLengthInstr = generateConditionalCompareImmInstruction(cg, node, lengthReg, 0, 4, TR::CC_NE); /* 4 for Z flag */ + auto ccmpLengthInstr = generateConditionalCompareImmInstruction(cg, node, lengthReg, 0, 4, TR::CC_NE, /* is64bit */ isArrayCmpLen); /* 4 for Z flag */ if (debugObj) { debugObj->addInstructionComment(ccmpLengthInstr, "Compares lengthReg with 0 if src1 and src2 are not the same array. Otherwise, sets EQ flag."); @@ -6961,14 +6960,14 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * TR::Register *data4Reg = srm->findOrCreateScratchRegister(); if (!isLengthGreaterThan15) { - generateCompareImmInstruction(cg, node, lengthReg, 16); + generateCompareImmInstruction(cg, node, lengthReg, 16, /* is64bit */ isArrayCmpLen); auto branchToLessThan16LabelInstr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, lessThan16Label, TR::CC_CC); if (debugObj) { debugObj->addInstructionComment(branchToLessThan16LabelInstr, "Jumps to lessThan16Label if length < 16."); } } - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmw, node, lengthReg, lengthReg, 16); + generateTrg1Src1ImmInstruction(cg, isArrayCmpLen ? TR::InstOpCode::subimmx : TR::InstOpCode::subimmw, node, lengthReg, lengthReg, 16); TR::LabelSymbol *loop16Label = generateLabelSymbol(cg); { @@ -6985,7 +6984,7 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * } generateConditionalCompareInstruction(cg, node, data3Reg, data4Reg, 0, TR::CC_EQ, true); auto branchToNotEqual16LabelInstr2 = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, notEqual16Label, TR::CC_NE); - auto subtractLengthInstr = generateTrg1Src1ImmInstruction(cg, isLengthGreaterThan15 ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 16); + auto subtractLengthInstr = generateTrg1Src1ImmInstruction(cg, (isLengthGreaterThan15 || isArrayCmpLen) ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 16); auto branchBacktoLoop16LabelInstr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loop16Label, TR::CC_CS); if (debugObj) { @@ -7004,7 +7003,10 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * auto branchToDoneLabelInstr3 = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, isArrayCmpLen ? done0Label : doneLabel, TR::CC_EQ); auto adjustSrc1RegInstr = generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, src1Reg, src1Reg, lengthReg); generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, src2Reg, src2Reg, lengthReg); - loadConstant32(cg, node, 0, lengthReg); + if (isArrayCmpLen) + loadConstant64(cg, node, 0, lengthReg); + else + loadConstant32(cg, node, 0, lengthReg); auto branchBacktoLoop16LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, loop16Label); if (debugObj) { @@ -7022,8 +7024,18 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * } else { - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmw, node, lengthReg, lengthReg, 16); - auto branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, lengthReg, isArrayCmpLen ? done0Label : doneLabel); + TR::Instruction *branchToDoneLabelInstr3; + if (isArrayCmpLen) + { + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, lengthReg, lengthReg, 16); + branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, lengthReg, done0Label); + } + else + { + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmw, node, lengthReg, lengthReg, 16); + branchToDoneLabelInstr3 = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, lengthReg, doneLabel); + } + auto branchToLessThan16Label2 = generateLabelInstruction(cg, TR::InstOpCode::b, node, lessThan16Label); if (debugObj) @@ -7080,7 +7092,7 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * auto branchToDone0LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, done0Label); auto lessThan16LabelInstr = generateLabelInstruction(cg, TR::InstOpCode::label, node, lessThan16Label); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 1); + generateTrg1Src1ImmInstruction(cg, isArrayCmpLen ? TR::InstOpCode::subsimmx : TR::InstOpCode::subsimmw, node, lengthReg, lengthReg, 1); generateTrg1MemInstruction(cg, TR::InstOpCode::ldrbpost, node, data1Reg, TR::MemoryReference::createWithDisplacement(cg, src1Reg, 1)); generateTrg1MemInstruction(cg, TR::InstOpCode::ldrbpost, node, data2Reg, TR::MemoryReference::createWithDisplacement(cg, src2Reg, 1)); generateConditionalCompareInstruction(cg, node, data1Reg, data2Reg, 0, TR::CC_HI); @@ -7151,6 +7163,18 @@ OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator * return resultReg; } +TR::Register * +OMR::ARM64::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return arraycmpEvaluatorHelper(node, cg, false); + } + +TR::Register * +OMR::ARM64::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return arraycmpEvaluatorHelper(node, cg, true); + } + static void inlineConstantLengthForwardArrayCopy(TR::Node *node, int64_t byteLen, TR::Register *srcReg, TR::Register *dstReg, TR::CodeGenerator *cg) { diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp index a46c886faa8..58056973574 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.hpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.hpp @@ -833,6 +833,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *computeCCEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *butestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *sutestEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/arm/codegen/OMRTreeEvaluator.cpp b/compiler/arm/codegen/OMRTreeEvaluator.cpp index 422abdbf083..f62a10c1cc4 100644 --- a/compiler/arm/codegen/OMRTreeEvaluator.cpp +++ b/compiler/arm/codegen/OMRTreeEvaluator.cpp @@ -3626,6 +3626,12 @@ TR::Register *OMR::ARM::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::Cod return NULL; } +TR::Register *OMR::ARM::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + TR_UNIMPLEMENTED(); + return NULL; + } + bool OMR::ARM::TreeEvaluator::stopUsingCopyReg( TR::Node* node, TR::Register*& reg, diff --git a/compiler/arm/codegen/OMRTreeEvaluator.hpp b/compiler/arm/codegen/OMRTreeEvaluator.hpp index 49cb9391183..25c59cce12b 100644 --- a/compiler/arm/codegen/OMRTreeEvaluator.hpp +++ b/compiler/arm/codegen/OMRTreeEvaluator.hpp @@ -762,6 +762,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBStartEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBEndEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *commonLoadEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic memoryToRegisterOp, int32_t memSize, TR::CodeGenerator *cg); diff --git a/compiler/codegen/OMRCodeGenerator.hpp b/compiler/codegen/OMRCodeGenerator.hpp index e163f9a0a93..abd116e567a 100644 --- a/compiler/codegen/OMRCodeGenerator.hpp +++ b/compiler/codegen/OMRCodeGenerator.hpp @@ -1631,6 +1631,9 @@ class OMR_EXTENSIBLE CodeGenerator bool getSupportsArrayCmp() {return _flags1.testAny(SupportsArrayCmp);} void setSupportsArrayCmp() {_flags1.set(SupportsArrayCmp);} + bool getSupportsArrayCmpLen() {return _flags1.testAny(SupportsArrayCmpLen);} + void setSupportsArrayCmpLen() {_flags1.set(SupportsArrayCmpLen);} + bool getSupportsArrayCmpSign() {return _flags3.testAny(SupportsArrayCmpSign);} void setSupportsArrayCmpSign() {_flags3.set(SupportsArrayCmpSign);} @@ -1832,7 +1835,7 @@ class OMR_EXTENSIBLE CodeGenerator // AVAILABLE = 0x02000000, UsesRegisterPairsForLongs = 0x04000000, SupportsArraySet = 0x08000000, - // AVAILABLE = 0x10000000, + SupportsArrayCmpLen = 0x10000000, SupportsArrayCmp = 0x20000000, DisableLongGRA = 0x40000000, DummyLastEnum1 diff --git a/compiler/compile/OMRNonHelperSymbols.enum b/compiler/compile/OMRNonHelperSymbols.enum index efacf119412..e71321980d7 100644 --- a/compiler/compile/OMRNonHelperSymbols.enum +++ b/compiler/compile/OMRNonHelperSymbols.enum @@ -27,6 +27,7 @@ arraySetSymbol, arrayCopySymbol, arrayCmpSymbol, + arrayCmpLenSymbol, prefetchSymbol, killsAllMethodSymbol, // A dummy method whose alias set includes all diff --git a/compiler/compile/OMRSymbolReferenceTable.cpp b/compiler/compile/OMRSymbolReferenceTable.cpp index 7f302041fbe..2d46ecda690 100644 --- a/compiler/compile/OMRSymbolReferenceTable.cpp +++ b/compiler/compile/OMRSymbolReferenceTable.cpp @@ -663,6 +663,19 @@ OMR::SymbolReferenceTable::findOrCreateArrayCmpSymbol() return element(arrayCmpSymbol); } +TR::SymbolReference * +OMR::SymbolReferenceTable::findOrCreateArrayCmpLenSymbol() + { + if (!element(arrayCmpLenSymbol)) + { + TR::MethodSymbol * sym = TR::MethodSymbol::create(trHeapMemory(),TR_Helper); + sym->setHelper(); + + element(arrayCmpLenSymbol) = new (trHeapMemory()) TR::SymbolReference(self(), arrayCmpLenSymbol, sym); + } + return element(arrayCmpLenSymbol); + } + TR::SymbolReference * OMR::SymbolReferenceTable::findOrCreateCurrentTimeMaxPrecisionSymbol() { diff --git a/compiler/compile/OMRSymbolReferenceTable.hpp b/compiler/compile/OMRSymbolReferenceTable.hpp index f60641f29c7..46e62ce8d72 100644 --- a/compiler/compile/OMRSymbolReferenceTable.hpp +++ b/compiler/compile/OMRSymbolReferenceTable.hpp @@ -88,6 +88,7 @@ class SymbolReferenceTable arraySetSymbol, arrayCopySymbol, arrayCmpSymbol, + arrayCmpLenSymbol, prefetchSymbol, killsAllMethodSymbol, // A dummy method whose alias set includes all @@ -844,6 +845,7 @@ class SymbolReferenceTable TR::SymbolReference * findOrCreateArrayCopySymbol(); TR::SymbolReference * findOrCreateArraySetSymbol(); TR::SymbolReference * findOrCreateArrayCmpSymbol(); + TR::SymbolReference * findOrCreateArrayCmpLenSymbol(); TR::SymbolReference * findOrCreateClassSymbol(TR::ResolvedMethodSymbol * owningMethodSymbol, int32_t cpIndex, void * classObject, bool cpIndexOfStatic = false); TR::SymbolReference * findOrCreateArrayShadowSymbolRef(TR::DataType type, TR::Node * baseArrayAddress, int32_t size, TR_FrontEnd * fe); diff --git a/compiler/il/Aliases.cpp b/compiler/il/Aliases.cpp index 61ccd646994..d9add540866 100644 --- a/compiler/il/Aliases.cpp +++ b/compiler/il/Aliases.cpp @@ -351,7 +351,8 @@ OMR::SymbolReference::getUseDefAliasesBV(bool isDirectCall, bool includeGCSafePo return &symRefTab->aliasBuilder.defaultMethodDefAliases(); } - if (symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpSymbol)) + if (symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpSymbol) || + symRefTab->isNonHelper(self(), TR::SymbolReferenceTable::arrayCmpLenSymbol)) return 0; switch (self()->getReferenceNumber()) diff --git a/compiler/il/OMRILOps.hpp b/compiler/il/OMRILOps.hpp index ad2806d49c1..3110bcd83e7 100644 --- a/compiler/il/OMRILOps.hpp +++ b/compiler/il/OMRILOps.hpp @@ -583,11 +583,12 @@ class ILOpCode bool isFunctionCall() const { - return isCall() && - getOpCodeValue() != TR::arraycopy && - getOpCodeValue() != TR::arrayset && - getOpCodeValue() != TR::bitOpMem && - getOpCodeValue() != TR::arraycmp; + return isCall() && + getOpCodeValue() != TR::arraycopy && + getOpCodeValue() != TR::arrayset && + getOpCodeValue() != TR::bitOpMem && + getOpCodeValue() != TR::arraycmp && + getOpCodeValue() != TR::arraycmplen; } bool isCompareDouble() @@ -662,10 +663,11 @@ class ILOpCode bool isMemToMemOp() { - return getOpCodeValue() == TR::bitOpMem || - getOpCodeValue() == TR::arrayset || - getOpCodeValue() == TR::arraycmp || - getOpCodeValue() == TR::arraycopy; + return getOpCodeValue() == TR::bitOpMem || + getOpCodeValue() == TR::arrayset || + getOpCodeValue() == TR::arraycmp || + getOpCodeValue() == TR::arraycopy || + getOpCodeValue() == TR::arraycmplen; } static TR::ILOpCodes getDataTypeConversion(TR::DataType t1, TR::DataType t2); diff --git a/compiler/il/OMRNode.cpp b/compiler/il/OMRNode.cpp index 3e20992fb57..d809db4af32 100644 --- a/compiler/il/OMRNode.cpp +++ b/compiler/il/OMRNode.cpp @@ -3649,6 +3649,7 @@ OMR::Node::exceptionsRaised() possibleExceptions |= TR::Block:: CanCatchBoundCheck; break; case TR::arraycmp: // does not throw any exceptions + case TR::arraycmplen: break; case TR::checkcast: possibleExceptions |= TR::Block:: CanCatchCheckCast; @@ -6111,28 +6112,6 @@ OMR::Node::chkTableBackedByRawStorage() return self()->getOpCodeValue() == TR::arraytranslate && _flags.testAny(tableBackedByRawStorage); } -bool -OMR::Node::isArrayCmpLen() - { - TR_ASSERT(self()->getOpCodeValue() == TR::arraycmp, "Opcode must be arraycmp"); - return _flags.testAny(arrayCmpLen); - } - -void -OMR::Node::setArrayCmpLen(bool v) - { - TR::Compilation *c = TR::comp(); - TR_ASSERT(self()->getOpCodeValue() == TR::arraycmp, "Opcode must be arraycmp"); - if (performNodeTransformation2(c, "O^O NODE FLAGS: Setting arrayCmpLen flag on node %p to %d\n", self(), v)) - _flags.set(arrayCmpLen, v); - } - -bool -OMR::Node::chkArrayCmpLen() - { - return self()->getOpCodeValue() == TR::arraycmp && _flags.testAny(arrayCmpLen); - } - bool OMR::Node::isArrayCmpSign() { diff --git a/compiler/il/OMRNode.hpp b/compiler/il/OMRNode.hpp index 5da70fbf591..bdc421b74cf 100644 --- a/compiler/il/OMRNode.hpp +++ b/compiler/il/OMRNode.hpp @@ -1191,10 +1191,6 @@ class OMR_EXTENSIBLE Node bool chkTableBackedByRawStorage(); // Flags used by TR::arraycmp - bool isArrayCmpLen(); - void setArrayCmpLen(bool v); - bool chkArrayCmpLen(); - bool isArrayCmpSign(); void setArrayCmpSign(bool v); bool chkArrayCmpSign(); @@ -1851,7 +1847,6 @@ class OMR_EXTENSIBLE Node tableBackedByRawStorage = 0x00008000, // Flags used by TR::arraycmp - arrayCmpLen = 0x00008000, arrayCmpSign = 0x00004000, // Flags used by TR::arraycopy diff --git a/compiler/il/OMROpcodes.enum b/compiler/il/OMROpcodes.enum index dd714fb9a4a..f3aa5d0a26b 100644 --- a/compiler/il/OMROpcodes.enum +++ b/compiler/il/OMROpcodes.enum @@ -7201,7 +7201,25 @@ OPCODE_MACRO(\ /* .reverseBranchOpCode = */ TR::BadILOp, \ /* .booleanCompareOpCode = */ TR::BadILOp, \ /* .ifCompareOpCode = */ TR::BadILOp, \ - /* .description = Inline code for memory comparison of part of an array */ \ + /* .description = Compare two blocks of memory and returning a lexical ordering constant. */ \ + /* The constant indicates whether the first child is lesser, equal, or greater than the second child. */ \ + /* When the arrayCmpSign flag is set, the lesser/equal/greater constants are -1/0/1, otherwise the constants are 1/0/2 */ \ +) +OPCODE_MACRO(\ + /* .opcode = */ arraycmplen, \ + /* .name = */ "arraycmplen", \ + /* .properties1 = */ ILProp1::Call | ILProp1::HasSymbolRef, \ + /* .properties2 = */ 0, \ + /* .properties3 = */ ILProp3::LikeUse | ILProp3::LikeDef | ILProp3::SkipDynamicLitPoolOnInts, \ + /* .properties4 = */ 0, \ + /* .dataType = */ TR::Int64, \ + /* .typeProperties = */ ILTypeProp::Size_8 | ILTypeProp::Integer | ILTypeProp::Unsigned, \ + /* .childProperties = */ THREE_CHILD(TR::Address, TR::Address, TR::Int64), \ + /* .swapChildrenOpCode = */ TR::BadILOp, \ + /* .reverseBranchOpCode = */ TR::BadILOp, \ + /* .booleanCompareOpCode = */ TR::BadILOp, \ + /* .ifCompareOpCode = */ TR::BadILOp, \ + /* .description = Compare two blocks of memory and returning the index of the first mismatched byte */ \ ) OPCODE_MACRO(\ /* .opcode = */ allocationFence, \ diff --git a/compiler/optimizer/DeadTreesElimination.cpp b/compiler/optimizer/DeadTreesElimination.cpp index 125a1710b97..c6c371cd229 100644 --- a/compiler/optimizer/DeadTreesElimination.cpp +++ b/compiler/optimizer/DeadTreesElimination.cpp @@ -672,7 +672,6 @@ bool TR::DeadTreesElimination::fixUpTree(TR::Node *node, TR::TreeTop *treeTop, T // for arraycmp node, don't create its tree top anchor // fold it into if statment and save jump instruction if (node->getOpCodeValue() == TR::arraycmp && - !node->isArrayCmpLen() && comp()->target().cpu.isX86()) { anchorArrayCmp = false; diff --git a/compiler/optimizer/IsolatedStoreElimination.cpp b/compiler/optimizer/IsolatedStoreElimination.cpp index 2fd4735bba1..3b55dd8aad7 100644 --- a/compiler/optimizer/IsolatedStoreElimination.cpp +++ b/compiler/optimizer/IsolatedStoreElimination.cpp @@ -1628,6 +1628,7 @@ nodeHasSideEffect(TR::Node *node) { case TR::arrayset: case TR::arraycmp: + case TR::arraycmplen: case TR::arraytranslate: case TR::arraytranslateAndTest: case TR::long2String: diff --git a/compiler/optimizer/LoopReducer.cpp b/compiler/optimizer/LoopReducer.cpp index 17791357cbf..aca1f3b1fc7 100644 --- a/compiler/optimizer/LoopReducer.cpp +++ b/compiler/optimizer/LoopReducer.cpp @@ -4429,6 +4429,7 @@ TR_LoopReducer::perform() !comp()->cg()->getSupportsReferenceArrayCopy() && !comp()->cg()->getSupportsPrimitiveArrayCopy() && !comp()->cg()->getSupportsArrayCmp() && + !comp()->cg()->getSupportsArrayCmpLen() && !comp()->cg()->getSupportsArrayTranslateTRxx() && !comp()->cg()->getSupportsArrayTranslateAndTest()) { diff --git a/compiler/optimizer/OMRSimplifierHandlers.cpp b/compiler/optimizer/OMRSimplifierHandlers.cpp index 6f8740434b6..e33f2fd51b7 100644 --- a/compiler/optimizer/OMRSimplifierHandlers.cpp +++ b/compiler/optimizer/OMRSimplifierHandlers.cpp @@ -12464,7 +12464,7 @@ TR::Node *bu2iSimplifier(TR::Node * node, TR::Block * block, TR::Simplifier * s) } else if (firstChild->getOpCodeValue() == TR::i2b && (firstChild->getFirstChild()->getOpCodeValue() == TR::butest || - firstChild->getFirstChild()->getOpCodeValue() == TR::arraycmp || + (firstChild->getFirstChild()->getOpCodeValue() == TR::arraycmp && !firstChild->getFirstChild()->isArrayCmpSign()) || firstChild->getFirstChild()->getOpCodeValue() == TR::icmpeq || firstChild->getFirstChild()->getOpCodeValue() == TR::lcmpeq || firstChild->getFirstChild()->getOpCodeValue() == TR::icmpne || diff --git a/compiler/optimizer/OMRSimplifierTable.enum b/compiler/optimizer/OMRSimplifierTable.enum index 94751d7c043..9cce407c88c 100644 --- a/compiler/optimizer/OMRSimplifierTable.enum +++ b/compiler/optimizer/OMRSimplifierTable.enum @@ -611,6 +611,7 @@ #define long2StringSimplifierHandler dftSimplifier #define bitOpMemSimplifierHandler bitOpMemSimplifier #define arraycmpSimplifierHandler dftSimplifier +#define arraycmplenSimplifierHandler dftSimplifier #define allocationFenceSimplifierHandler dftSimplifier #define loadFenceSimplifierHandler dftSimplifier #define storeFenceSimplifierHandler dftSimplifier diff --git a/compiler/optimizer/SinkStores.cpp b/compiler/optimizer/SinkStores.cpp index 5f3099ff09f..95674bcca1f 100644 --- a/compiler/optimizer/SinkStores.cpp +++ b/compiler/optimizer/SinkStores.cpp @@ -1823,6 +1823,8 @@ bool TR_SinkStores::treeIsSinkableStore(TR::Node *node, bool sinkIndirectLoads, { if (node->getOpCodeValue() == TR::arraycmp) traceMsg(comp()," *arraycmp is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); + else if (node->getOpCodeValue() == TR::arraycmplen) + traceMsg(comp()," *arraycmplen is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); else if (node->getOpCodeValue() == TR::arraycopy) traceMsg(comp()," *arraycopy is a call %d, raises exceptions %d*\n",node->getOpCode().isCall(),node->exceptionsRaised()); traceMsg(comp(), " *store is a call or an excepting node*\n"); diff --git a/compiler/optimizer/ValuePropagationTable.hpp b/compiler/optimizer/ValuePropagationTable.hpp index 81ea86321f3..54d6ec16c4c 100644 --- a/compiler/optimizer/ValuePropagationTable.hpp +++ b/compiler/optimizer/ValuePropagationTable.hpp @@ -749,6 +749,7 @@ TR::Node * constrainLongBitCount(OMR::ValuePropagation *vp, TR::Node *node); #define long2StringVPHandler constrainChildren #define bitOpMemVPHandler constrainChildren #define arraycmpVPHandler constrainChildren +#define arraycmplenVPHandler constrainChildren #define allocationFenceVPHandler constrainChildren #define loadFenceVPHandler constrainChildren #define storeFenceVPHandler constrainChildren diff --git a/compiler/p/codegen/OMRCodeGenerator.cpp b/compiler/p/codegen/OMRCodeGenerator.cpp index 3d70ea5cb82..d5150d79362 100644 --- a/compiler/p/codegen/OMRCodeGenerator.cpp +++ b/compiler/p/codegen/OMRCodeGenerator.cpp @@ -226,6 +226,7 @@ OMR::Power::CodeGenerator::initialize() cg->setSupportsArraySet(); } cg->setSupportsArrayCmp(); + cg->setSupportsArrayCmpLen(); if (comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX)) { diff --git a/compiler/p/codegen/OMRTreeEvaluator.cpp b/compiler/p/codegen/OMRTreeEvaluator.cpp index da37a2d012f..d6b285f817a 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.cpp +++ b/compiler/p/codegen/OMRTreeEvaluator.cpp @@ -5446,8 +5446,8 @@ static inline void loadArrayCmpSources(TR::Node *node, TR::InstOpCode::Mnemonic } } -static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) -{ +static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) + { TR::Node *src1AddrNode = node->getChild(0); TR::Node *src2AddrNode = node->getChild(1); TR::Node *lengthNode = node->getChild(2); @@ -5458,6 +5458,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) TR::Register *returnReg = cg->allocateRegister(TR_GPR); TR::Register *tempReg = cg->gprClobberEvaluate(lengthNode); TR::Register *temp2Reg = cg->allocateRegister(TR_GPR); + TR::Register *pairReg = nullptr; TR::Register *vec0Reg = cg->allocateRegister(TR_VRF); TR::Register *vec1Reg = cg->allocateRegister(TR_VRF); @@ -5469,17 +5470,32 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) TR::LabelSymbol *endLabel = generateLabelSymbol(cg); TR::LabelSymbol *resultLabel = generateLabelSymbol(cg); + bool is64bit = cg->comp()->target().is64Bit(); + + if (isArrayCmpLen && !is64bit) + { + pairReg = tempReg; + tempReg = tempReg->getLowOrder(); + } + generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel); startLabel->setStartInternalControlFlow(); generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, indexReg, 0); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpi4, node, condReg, tempReg, 16); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg, tempReg, 16); // We don't need length anymore as we can calculate the appropriate index by using indexReg and the remainder generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, returnReg, tempReg, 0, 0xF); generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, residueStartLabel, condReg); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::srawi, node, tempReg, tempReg, 4); + if (is64bit && isArrayCmpLen) + { + generateShiftRightLogicalImmediateLong(cg, node, tempReg, tempReg, 4); + } + else + { + generateShiftRightLogicalImmediate(cg, node, tempReg, tempReg, 4); + } generateSrc1Instruction(cg, TR::InstOpCode::mtctr, node, tempReg); generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStartLabel); @@ -5511,7 +5527,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) generateTrg1Src1Instruction(cg, TR::InstOpCode::vclzlsbb, node, tempReg, vec0Reg); - if (!node->isArrayCmpLen()) + if (!isArrayCmpLen) { generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, node, returnReg, returnReg, -1); } @@ -5523,7 +5539,7 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) // index = index + offset, if we need to return unmatched index, then we are done here generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, returnReg, indexReg, returnReg); - if (!node->isArrayCmpLen()) + if (!isArrayCmpLen) { generateTrg1Src2Instruction(cg, TR::InstOpCode::lbzx, node, tempReg, returnReg, src1AddrReg); generateTrg1Src2Instruction(cg, TR::InstOpCode::lbzx, node, indexReg, returnReg, src2AddrReg); @@ -5535,6 +5551,10 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, returnReg, tempReg, 2, 3); generateTrg1Src2Instruction(cg, TR::InstOpCode::add, node, returnReg, returnReg, tempReg); } + else if (!is64bit) + { + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, temp2Reg, 0); + } int32_t numRegs = 9; @@ -5555,19 +5575,35 @@ static TR::Register *inlineArrayCmpP10(TR::Node *node, TR::CodeGenerator *cg) generateDepLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, dependencies); endLabel->setEndInternalControlFlow(); - node->setRegister(returnReg); + if (isArrayCmpLen && !is64bit) + { + TR::Register *lowReturnReg = returnReg; + returnReg = cg->allocateRegisterPair(returnReg, temp2Reg); + node->setRegister(returnReg); + TR::Register *liveRegs[4] = { src1AddrReg, src2AddrReg, lowReturnReg, temp2Reg }; + dependencies->stopUsingDepRegs(cg, 4, liveRegs); + cg->stopUsingRegister(pairReg); + } + else + { + node->setRegister(returnReg); + TR::Register *liveRegs[3] = { src1AddrReg, src2AddrReg, returnReg }; + dependencies->stopUsingDepRegs(cg, 3, liveRegs); + } cg->decReferenceCount(src1AddrNode); cg->decReferenceCount(src2AddrNode); cg->decReferenceCount(lengthNode); - TR::Register *liveRegs[3] = { src1AddrReg, src2AddrReg, returnReg }; - dependencies->stopUsingDepRegs(cg, 3, liveRegs); return returnReg; -} + } -static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) +static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg, bool isArrayCmpLen) { + static char *disableP10ArrayCmp = feGetEnv("TR_DisableP10ArrayCmp"); + if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10) && (disableP10ArrayCmp == NULL)) + return inlineArrayCmpP10(node, cg, isArrayCmpLen); + TR::Node *src1AddrNode = node->getChild(0); TR::Node *src2AddrNode = node->getChild(1); TR::Node *lengthNode = node->getChild(2); @@ -5593,11 +5629,22 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) TR::Register *src1AddrReg = cg->gprClobberEvaluate(src1AddrNode); TR::Register *src2AddrReg = cg->gprClobberEvaluate(src2AddrNode); - byteLen = 4; - if (cg->comp()->target().is64Bit()) + bool is64bit = cg->comp()->target().is64Bit(); + + if (is64bit) + { byteLen = 8; + } + else + { + byteLen = 4; + } byteLenRegister = cg->evaluate(lengthNode); + if (isArrayCmpLen && !is64bit) + { + byteLenRegister = byteLenRegister->getLowOrder(); + } byteLenRemainingRegister = cg->allocateRegister(TR_GPR); tempReg = cg->allocateRegister(TR_GPR); @@ -5613,13 +5660,20 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) condReg2 = cg->allocateRegister(TR_CCR); mid2Label = generateLabelSymbol(cg); - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, byteLen); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, byteLen); generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, node, mid2Label, condReg2); generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi2, node, src1AddrReg, src1AddrReg, -1*byteLen); generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi2, node, src2AddrReg, src2AddrReg, -1*byteLen); - generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::srawi, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + if (is64bit && isArrayCmpLen) + { + generateShiftRightLogicalImmediateLong(cg, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + } + else + { + generateShiftRightLogicalImmediate(cg, node, tempReg, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + } generateSrc1Instruction(cg, TR::InstOpCode::mtctr, node, tempReg); loopStartLabel = generateLabelSymbol(cg); @@ -5646,7 +5700,21 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) generateTrg1MemInstruction (cg, TR::InstOpCode::ldu, node, src2Reg, TR::MemoryReference::createWithDisplacement(cg, src2AddrReg, 8, 8)); } - TR::Register *ccReg = cg->allocateRegister(TR_GPR); + TR::Register *ccReg = nullptr; + TR::Register *lowReturnReg = nullptr; + TR::Register *highReturnReg = nullptr; + + if (!is64bit && isArrayCmpLen) + { + lowReturnReg = cg->allocateRegister(TR_GPR); + highReturnReg = cg->allocateRegister(TR_GPR); + ccReg = cg->allocateRegisterPair(lowReturnReg, highReturnReg); + } + else + { + ccReg = cg->allocateRegister(TR_GPR); + } + generateTrg1Src2Instruction(cg, (byteLen == 8) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmp4, node, condReg, src1Reg, src2Reg); generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, residueStartLabel, condReg); @@ -5660,12 +5728,17 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) generateTrg1Instruction(cg, TR::InstOpCode::mfctr, node, byteLenRemainingRegister); - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, 0); + generateTrg1Src1ImmInstruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, 0); generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, byteLenRemainingRegister, byteLenRemainingRegister, tempReg); - generateShiftLeftImmediate(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + + if (is64bit && isArrayCmpLen) + generateShiftLeftImmediateLong(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + else + generateShiftLeftImmediate(cg, node, byteLenRemainingRegister, byteLenRemainingRegister, (byteLen == 8) ? 3 : 2); + generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, midLabel, condReg2); - generateTrg1Src2Instruction(cg, (byteLen == 8) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmp4, node, condReg2, byteLenRemainingRegister, byteLenRegister); + generateTrg1Src2Instruction(cg, (is64bit && isArrayCmpLen) ? TR::InstOpCode::cmp8 : TR::InstOpCode::cmpl4, node, condReg2, byteLenRemainingRegister, byteLenRegister); generateLabelInstruction(cg, TR::InstOpCode::label, node, midLabel); generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, byteLenRemainingRegister, byteLenRemainingRegister, byteLenRegister); generateLabelInstruction(cg, TR::InstOpCode::label, node, mid2Label); @@ -5691,11 +5764,21 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) generateLabelInstruction(cg, TR::InstOpCode::label, node, resultLabel); - if (node->isArrayCmpLen()) - generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, ccReg, byteLenRemainingRegister, byteLenRegister); + if (isArrayCmpLen) + { + if (is64bit) + { + generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, ccReg, byteLenRemainingRegister, byteLenRegister); + } + else + { + generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, node, lowReturnReg, byteLenRemainingRegister, byteLenRegister); + generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, highReturnReg, 0); + } + } else { - generateTrg1Src1ImmInstruction(cg, (byteLen == 8) ? TR::InstOpCode::cmpi8 : TR::InstOpCode::cmpi4, node, condReg2, byteLenRemainingRegister, 0); + generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::cmpli4, node, condReg2, byteLenRemainingRegister, 0); generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, node, result2Label, condReg2); generateTrg1ImmInstruction(cg, TR::InstOpCode::li, node, ccReg, 0); generateLabelInstruction(cg, TR::InstOpCode::b, node, residueEndLabel); @@ -5706,6 +5789,10 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) } int32_t numRegs = 10; + if (!is64bit && isArrayCmpLen) + { + numRegs = 11; + } TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numRegs, cg->trMemory()); dependencies->addPostCondition(src1Reg, TR::RealRegister::NoReg); @@ -5715,7 +5802,15 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) dependencies->addPostCondition(byteLenRegister, TR::RealRegister::NoReg); dependencies->addPostCondition(byteLenRemainingRegister, TR::RealRegister::NoReg); dependencies->addPostCondition(tempReg, TR::RealRegister::NoReg); - dependencies->addPostCondition(ccReg, TR::RealRegister::NoReg); + if (!is64bit && isArrayCmpLen) + { + dependencies->addPostCondition(lowReturnReg, TR::RealRegister::NoReg); + dependencies->addPostCondition(highReturnReg, TR::RealRegister::NoReg); + } + else + { + dependencies->addPostCondition(ccReg, TR::RealRegister::NoReg); + } dependencies->addPostCondition(condReg, TR::RealRegister::NoReg); dependencies->addPostCondition(condReg2, TR::RealRegister::NoReg); @@ -5747,11 +5842,12 @@ static TR::Register *inlineArrayCmp(TR::Node *node, TR::CodeGenerator *cg) TR::Register *OMR::Power::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - TR::Compilation *comp = cg->comp(); - static char *disableP10ArrayCmp = feGetEnv("TR_DisableP10ArrayCmp"); - if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10) && !disableP10ArrayCmp) - return inlineArrayCmpP10(node, cg); - return inlineArrayCmp(node, cg); + return inlineArrayCmp(node, cg, false); + } + +TR::Register *OMR::Power::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + return inlineArrayCmp(node, cg, true); } bool OMR::Power::TreeEvaluator::stopUsingCopyReg( diff --git a/compiler/p/codegen/OMRTreeEvaluator.hpp b/compiler/p/codegen/OMRTreeEvaluator.hpp index 19b8047ff59..8bdda32915b 100644 --- a/compiler/p/codegen/OMRTreeEvaluator.hpp +++ b/compiler/p/codegen/OMRTreeEvaluator.hpp @@ -708,6 +708,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateAndTestEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *compareIntsForEquality(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *compareIntsForEquality(TR::InstOpCode::Mnemonic branchOp, TR::LabelSymbol *dstLabel, TR::Node *node, TR::CodeGenerator *cg, bool isHint=false, bool likeliness=false); diff --git a/compiler/ras/Debug.cpp b/compiler/ras/Debug.cpp index c9c5eb764cb..c23657a3b5f 100644 --- a/compiler/ras/Debug.cpp +++ b/compiler/ras/Debug.cpp @@ -1053,7 +1053,6 @@ TR_Debug::nodePrintAllFlags(TR::Node *node, TR_PrettyPrinterString &output) FLAG(chkSkipZeroInitialization, "skipZeroInit"); FLAG(chkDontMoveUnderBranch, "dontMoveUnderBranch"); FLAG(chkIsPrivatizedInlinerArg, "privatizedInlinerArg"); - FLAG(chkArrayCmpLen, "arrayCmpLen"); FLAG(chkArrayCmpSign, "arrayCmpSign"); FLAG(chkXorBitOpMem, "SubOp=XOR"); FLAG(chkOrBitOpMem, "SubOp=OR"); @@ -1612,6 +1611,8 @@ TR_Debug::getName(TR::SymbolReference * symRef) return ""; case TR::SymbolReferenceTable::arrayCmpSymbol: return ""; + case TR::SymbolReferenceTable::arrayCmpLenSymbol: + return ""; case TR::SymbolReferenceTable::currentTimeMaxPrecisionSymbol: return ""; case TR::SymbolReferenceTable::encodeASCIISymbol: diff --git a/compiler/riscv/codegen/OMRTreeEvaluator.cpp b/compiler/riscv/codegen/OMRTreeEvaluator.cpp index e5449c4e775..6fc942814d9 100644 --- a/compiler/riscv/codegen/OMRTreeEvaluator.cpp +++ b/compiler/riscv/codegen/OMRTreeEvaluator.cpp @@ -3028,6 +3028,13 @@ OMR::RV::TreeEvaluator::arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); } +TR::Register * +OMR::RV::TreeEvaluator::arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + // TODO:RV: Enable TR::TreeEvaluator::arraycmplenEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. + return OMR::RV::TreeEvaluator::unImpOpEvaluator(node, cg); + } + TR::Register * OMR::RV::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg) { diff --git a/compiler/riscv/codegen/OMRTreeEvaluator.hpp b/compiler/riscv/codegen/OMRTreeEvaluator.hpp index 5f90a6e6614..6ed726321b9 100644 --- a/compiler/riscv/codegen/OMRTreeEvaluator.hpp +++ b/compiler/riscv/codegen/OMRTreeEvaluator.hpp @@ -504,6 +504,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *long2StringEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *bitOpMemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *allocationFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *loadFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *storeFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index 765d3b2a764..9c0a807c444 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -380,6 +380,7 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) if (!TR::Compiler->om.canGenerateArraylets()) { self()->setSupportsArrayCmp(); + self()->setSupportsArrayCmpLen(); self()->setSupportsPrimitiveArrayCopy(); if (!comp->getOption(TR_DisableArraySetOpts)) { diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index 985912a861f..cfab85388f8 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -1117,9 +1117,18 @@ OMR::X86::TreeEvaluator::arraycmpEvaluator( TR::Node *node, TR::CodeGenerator *cg) { - return node->isArrayCmpLen() ? TR::TreeEvaluator::SSE2ArraycmpLenEvaluator(node, cg) : TR::TreeEvaluator::SSE2ArraycmpEvaluator(node, cg); + return TR::TreeEvaluator::SSE2ArraycmpEvaluator(node, cg); } +TR::Register * +OMR::X86::TreeEvaluator::arraycmplenEvaluator( + TR::Node *node, + TR::CodeGenerator *cg) + { + return TR::TreeEvaluator::SSE2ArraycmpLenEvaluator(node, cg); + } + + TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg) { TR::Node *s1AddrNode = node->getChild(0); @@ -1273,7 +1282,8 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, TR::Register *s1Reg = cg->gprClobberEvaluate(s1AddrNode, TR::InstOpCode::MOVRegReg()); TR::Register *s2Reg = cg->gprClobberEvaluate(s2AddrNode, TR::InstOpCode::MOVRegReg()); - TR::Register *strLenReg = cg->gprClobberEvaluate(lengthNode, TR::InstOpCode::MOVRegReg()); + TR::Register *strLenReg = cg->longClobberEvaluate(lengthNode); + TR::Register *highReg = NULL; TR::Register *equalTestReg = cg->allocateRegister(TR_GPR); TR::Register *s2ByteReg = cg->allocateRegister(TR_GPR); TR::Register *byteCounterReg = cg->allocateRegister(TR_GPR); @@ -1284,6 +1294,14 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, TR::Machine *machine = cg->machine(); + if (cg->comp()->target().is32Bit() && strLenReg->getRegisterPair()) + { + // On 32-bit, the length is guaranteed to fit into the bottom 32 bits + strLenReg = strLenReg->getLowOrder(); + // The high 32 bits will all be zero, so we can save this reg to zero-extend the final result + highReg = strLenReg->getHighOrder(); + } + generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, resultReg, 0, cg); generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg); generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, qwordCounterReg, strLenReg, cg); @@ -1350,6 +1368,17 @@ TR::Register *OMR::X86::TreeEvaluator::SSE2ArraycmpLenEvaluator(TR::Node *node, deps->addPostCondition(s1Reg, TR::RealRegister::NoReg, cg); generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg); + + if (cg->comp()->target().is32Bit()) + { + if (highReg == NULL) + { + highReg = cg->allocateRegister(TR_GPR); + generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, highReg, 0, cg); + } + resultReg = cg->allocateRegisterPair(resultReg, highReg); + } + node->setRegister(resultReg); cg->decReferenceCount(s1AddrNode); diff --git a/compiler/x/codegen/OMRTreeEvaluator.hpp b/compiler/x/codegen/OMRTreeEvaluator.hpp index 206ffc915e6..e112009b8dc 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/codegen/OMRTreeEvaluator.hpp @@ -215,6 +215,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *arraysetEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *overflowCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/i386/codegen/OMRCodeGenerator.cpp b/compiler/x/i386/codegen/OMRCodeGenerator.cpp index d262b2d495b..152510f40a1 100644 --- a/compiler/x/i386/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/i386/codegen/OMRCodeGenerator.cpp @@ -81,6 +81,8 @@ OMR::X86::I386::CodeGenerator::initialize() cg->setSupportsArrayTranslateAndTest(); if (debug("supportsArrayCmp")) cg->setSupportsArrayCmp(); + if (debug("supportsArrayCmpLen")) + cg->setSupportsArrayCmpLen(); cg->setSupportsDoubleWordCAS(); cg->setSupportsDoubleWordSet(); diff --git a/compiler/z/codegen/OMRCodeGenerator.cpp b/compiler/z/codegen/OMRCodeGenerator.cpp index cf6685dfb08..cbd487a8108 100644 --- a/compiler/z/codegen/OMRCodeGenerator.cpp +++ b/compiler/z/codegen/OMRCodeGenerator.cpp @@ -503,6 +503,7 @@ OMR::Z::CodeGenerator::initialize() cg->setSupportsArraySet(); } cg->setSupportsArrayCmp(); + cg->setSupportsArrayCmpLen(); cg->setSupportsArrayCmpSign(); if (!comp->compileRelocatableCode()) { diff --git a/compiler/z/codegen/OMRTreeEvaluator.cpp b/compiler/z/codegen/OMRTreeEvaluator.cpp index a29e812beeb..813cc8e5184 100644 --- a/compiler/z/codegen/OMRTreeEvaluator.cpp +++ b/compiler/z/codegen/OMRTreeEvaluator.cpp @@ -11776,131 +11776,145 @@ OMR::Z::TreeEvaluator::arraycmpEvaluator(TR::Node * node, TR::CodeGenerator * cg TR::Register * firstBaseReg = NULL; TR::Register * secondBaseReg = NULL; bool lenMinusOne=false; - TR_ASSERT(!node->isArrayCmpLen() || !node->isArrayCmpSign(), "Invalid arraycmp node"); - if (!node->isArrayCmpLen()) + + // use CLC + TR::Register * resultReg; + + if (elemsExpr->getOpCode().isLoadConst()) { - // use CLC - TR::Register * resultReg; + int64_t elems = static_cast(getIntegralValue(elemsExpr)); //get number of elements (in bytes) + bool clobber = (comp->getOption(TR_DisableSSOpts) || elems>256 || elems==0 || node->isArrayCmpSign()); + if (!node->isArrayCmpSign()) + { + resultReg = TR::TreeEvaluator::arraycmpHelper( + node, + cg, + false, //isWideChar + true, //isEqualCmp + 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" + NULL, //compareTarget + NULL, //ificmpNode + true, //needResultReg + true); //return102 + // node->setRegister(resultReg); + return resultReg; + } + else + { + MemCmpConstLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elems); + ClobberRegisterForLoops(clobber,op,firstBaseAddr,firstBaseReg); + ClobberRegisterForLoops(clobber,op,secondBaseAddr,secondBaseReg); + op.generate(firstBaseReg, secondBaseReg); + resultReg = op.resultReg(); + } + } + else + { + TR::Register * elemsReg; - if (elemsExpr->getOpCode().isLoadConst()) + if (!node->isArrayCmpSign()) { - int32_t elems = (int32_t) getIntegralValue(elemsExpr); //get number of elements (in bytes) - bool clobber = (comp->getOption(TR_DisableSSOpts) || elems>256 || elems==0 || node->isArrayCmpSign()); - if (!node->isArrayCmpSign()) - { - resultReg = TR::TreeEvaluator::arraycmpHelper( - node, - cg, - false, //isWideChar - true, //isEqualCmp - 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" - NULL, //compareTarget - NULL, //ificmpNode - true, //needResultReg - true); //return102 - // node->setRegister(resultReg); - return resultReg; - } - else - { - MemCmpConstLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elems); - ClobberRegisterForLoops(clobber,op,firstBaseAddr,firstBaseReg); - ClobberRegisterForLoops(clobber,op,secondBaseAddr,secondBaseReg); - op.generate(firstBaseReg, secondBaseReg); - resultReg = op.resultReg(); - } + resultReg = TR::TreeEvaluator::arraycmpHelper( + node, + cg, + false, //isWideChar + true, //isEqualCmp + 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" + NULL, //compareTarget + NULL, //ificmpNode + true, //needResultReg + true); //return102 + + // node->setRegister(resultReg); + return resultReg; } else { - TR::Register * elemsReg; + elemsReg = cg->evaluateLengthMinusOneForMemoryOps(elemsExpr, true, lenMinusOne); + firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); + secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - if (!node->isArrayCmpSign()) - { - resultReg = TR::TreeEvaluator::arraycmpHelper( - node, - cg, - false, //isWideChar - true, //isEqualCmp - 0, //cmpValue. It means it is equivalent to do "arraycmp(A,B) == 0" - NULL, //compareTarget - NULL, //ificmpNode - true, //needResultReg - true); //return102 - - // node->setRegister(resultReg); - return resultReg; - } - else - { - elemsReg = cg->evaluateLengthMinusOneForMemoryOps(elemsExpr, true, lenMinusOne); - firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); - secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - - MemCmpVarLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elemsReg, elemsExpr); - op.generate(firstBaseReg, secondBaseReg); - resultReg = op.resultReg(); - cg->stopUsingRegister(elemsReg); - } + MemCmpVarLenSignMacroOp op(node, firstBaseAddr, secondBaseAddr, cg, elemsReg, elemsExpr); + op.generate(firstBaseReg, secondBaseReg); + resultReg = op.resultReg(); + cg->stopUsingRegister(elemsReg); } + } - cg->decReferenceCount(elemsExpr); - if (firstBaseReg!=NULL) cg->decReferenceCount(firstBaseAddr); - if (secondBaseReg!=NULL) cg->decReferenceCount(secondBaseAddr); + cg->decReferenceCount(elemsExpr); + if (firstBaseReg!=NULL) cg->decReferenceCount(firstBaseAddr); + if (secondBaseReg!=NULL) cg->decReferenceCount(secondBaseAddr); - if (firstBaseReg!=NULL) cg->stopUsingRegister(firstBaseReg); - if (secondBaseReg!=NULL) cg->stopUsingRegister(secondBaseReg); + if (firstBaseReg!=NULL) cg->stopUsingRegister(firstBaseReg); + if (secondBaseReg!=NULL) cg->stopUsingRegister(secondBaseReg); - TR_ASSERT( resultReg!=firstBaseReg && resultReg!=secondBaseReg, "arraycmpEvaluator -- result reg should be a new reg\n"); + TR_ASSERT( resultReg!=firstBaseReg && resultReg!=secondBaseReg, "arraycmpEvaluator -- result reg should be a new reg\n"); - node->setRegister(resultReg); - return resultReg; - } - else + node->setRegister(resultReg); + return resultReg; + } + +TR::Register * +OMR::Z::TreeEvaluator::arraycmplenEvaluator(TR::Node * node, TR::CodeGenerator * cg) + { + TR::Compilation *comp = cg->comp(); + + if (TR::isJ9() && !comp->getOption(TR_DisableSIMDArrayCompare) && cg->getSupportsVectorRegisters()) { - // use CLCL instruction - firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); - secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); + // An empirical study has showed that CLC is faster for all array sizes if the number of bytes to copy is known to be constant + if (!node->getChild(2)->getOpCode().isLoadConst()) + return TR::TreeEvaluator::arraycmpSIMDHelper(node, cg, NULL, NULL, true, !node->isArrayCmpSign()/*return102*/, true); + } + + TR::Node * firstBaseAddr = node->getFirstChild(); + TR::Node * secondBaseAddr = node->getSecondChild(); + TR::Node * elemsExpr = node->getChild(2); - TR::Register * orgLen = cg->gprClobberEvaluate(elemsExpr); - TR::Register * firstLen = cg->allocateRegister(); - TR::Register * secondLen = cg->allocateRegister(); - TR::RegisterPair * firstPair = cg->allocateConsecutiveRegisterPair(firstLen, firstBaseReg); - TR::RegisterPair * secondPair = cg->allocateConsecutiveRegisterPair(secondLen, secondBaseReg); - TR::Register * resultReg = cg->allocateRegister(); - TR::Instruction * cursor; + TR::Register * firstBaseReg = NULL; + TR::Register * secondBaseReg = NULL; + bool lenMinusOne=false; - TR::RegisterDependencyConditions * dependencies = cg->createDepsForRRMemoryInstructions(node, firstPair, secondPair); + // use CLCL instruction + firstBaseReg = cg->gprClobberEvaluate(firstBaseAddr); + secondBaseReg = cg->gprClobberEvaluate(secondBaseAddr); - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, firstLen, orgLen); - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, secondLen, orgLen); - cursor = generateRRInstruction(cg, TR::InstOpCode::CLCL, node, firstPair, secondPair); + TR::Register * orgLen = cg->gprClobberEvaluate(elemsExpr); + TR::Register * firstLen = cg->allocateRegister(); + TR::Register * secondLen = cg->allocateRegister(); + TR::RegisterPair * firstPair = cg->allocateConsecutiveRegisterPair(firstLen, firstBaseReg); + TR::RegisterPair * secondPair = cg->allocateConsecutiveRegisterPair(secondLen, secondBaseReg); + TR::Register * resultReg = cg->allocateRegister(); + TR::Instruction * cursor; - generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, orgLen); - cursor = generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, resultReg, firstLen); + TR::RegisterDependencyConditions * dependencies = cg->createDepsForRRMemoryInstructions(node, firstPair, secondPair); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, firstLen, orgLen); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, secondLen, orgLen); + cursor = generateRRInstruction(cg, TR::InstOpCode::CLCL, node, firstPair, secondPair); - cg->stopUsingRegister(firstPair); - cg->stopUsingRegister(secondPair); - cg->stopUsingRegister(firstBaseReg); - cg->stopUsingRegister(secondBaseReg); - cg->stopUsingRegister(firstLen); - cg->stopUsingRegister(secondLen); - cg->stopUsingRegister(orgLen); + generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, orgLen); + cursor = generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, resultReg, firstLen); - cg->decReferenceCount(elemsExpr); - cg->decReferenceCount(firstBaseAddr); - cg->decReferenceCount(secondBaseAddr); - cursor->setDependencyConditions(dependencies); + cg->stopUsingRegister(firstPair); + cg->stopUsingRegister(secondPair); + cg->stopUsingRegister(firstBaseReg); + cg->stopUsingRegister(secondBaseReg); + cg->stopUsingRegister(firstLen); + cg->stopUsingRegister(secondLen); + cg->stopUsingRegister(orgLen); - node->setRegister(resultReg); - return resultReg; + cg->decReferenceCount(elemsExpr); + cg->decReferenceCount(firstBaseAddr); + cg->decReferenceCount(secondBaseAddr); + cursor->setDependencyConditions(dependencies); - } - } + node->setRegister(resultReg); + return resultReg; + } #define TRTSIZE 256 TR::Register * @@ -15734,7 +15748,8 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, TR::LabelSymbol *compareTarget, TR::Node *ificmpNode, bool needResultReg, - bool return102) + bool return102, + bool isArrayCmpLen) { // Similar to arraycmpHelper, except it uses vector instructions and supports arraycmpsign and arraycmplen // Does not currently support aggregates or wide chars @@ -15746,11 +15761,8 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, TR::Node * secondAddrNode = return102 ? node->getSecondChild() : node->getFirstChild(); TR::Node * elemsExpr = node->getChild(2); bool isFoldedIf = compareTarget != NULL; - bool isArrayCmp = node->getOpCodeValue() == TR::arraycmp; TR::Compilation *comp = cg->comp(); - TR_ASSERT( !(isArrayCmp && node->isArrayCmpLen() && node->isArrayCmpSign()), "Invalid arraycmp node"); - TR::InstOpCode::S390BranchCondition ifxcmpBrCond = TR::InstOpCode::COND_NOP; if (isFoldedIf) { @@ -15771,7 +15783,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, // VLL uses lastByteIndexReg as the highest 0-based index to load, which is length - 1 generateRILInstruction(cg, TR::InstOpCode::getSubtractLogicalImmOpCode(), node, lastByteIndexReg, 1); - if(needResultReg && isArrayCmp && node->isArrayCmpLen()) + if(needResultReg && isArrayCmpLen) generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, resultReg, lastByteIndexReg); TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg); @@ -15811,7 +15823,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, } else if(needResultReg) { - if(isArrayCmp && node->isArrayCmpLen()) + if(isArrayCmpLen) generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, resultReg, 1);//Return length of the arrays, which is resultReg += 1 else generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, resultReg, resultReg);//Return zero to indicate equal @@ -15826,7 +15838,7 @@ OMR::Z::TreeEvaluator::arraycmpSIMDHelper(TR::Node *node, } else if(needResultReg) { - if(isArrayCmp && node->isArrayCmpLen()) + if(isArrayCmpLen) { // Return 0-based index of first non-matching element // resultReg - lastByteIndexReg = number of elements compared before the last loop diff --git a/compiler/z/codegen/OMRTreeEvaluator.hpp b/compiler/z/codegen/OMRTreeEvaluator.hpp index 537f2ce3ab3..06297753eba 100644 --- a/compiler/z/codegen/OMRTreeEvaluator.hpp +++ b/compiler/z/codegen/OMRTreeEvaluator.hpp @@ -1046,7 +1046,8 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator TR::LabelSymbol *compareTarget = NULL, TR::Node *ificmpNode = NULL, bool needResultReg = true, - bool return102 = false); + bool return102 = false, + bool isArrayCmpLen = false); static TR::Register *arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraytranslateEncodeSIMDEvaluator(TR::Node *node, TR::CodeGenerator *cg, ArrayTranslateFlavor convType); @@ -1055,6 +1056,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *long2StringEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *bitOpMemEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *arraycmpEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *arraycmplenEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBStartEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *BBEndEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/fvtest/compilertriltest/ArrayTest.cpp b/fvtest/compilertriltest/ArrayTest.cpp index b92119fef46..274f82647f1 100644 --- a/fvtest/compilertriltest/ArrayTest.cpp +++ b/fvtest/compilertriltest/ArrayTest.cpp @@ -42,7 +42,7 @@ class ArraycmpEqualTest : public TRTest::JitTest, public ::testing::WithParamInt */ class ArraycmpNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; -TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { +TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -50,14 +50,12 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { char inputTrees[1024] = {0}; /* * "address=0" parameter is needed for arraycmp opcode because "Call" property is set to the opcode. - * We need "flags=15" parameter to set arrayCmpLen flag. - * arrayCmpLen flag is defined as 0x8000, which is 1 << 15. */ std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -73,10 +71,10 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenSameArray) { std::vector s1(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s1[0])); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s1[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { +TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -86,7 +84,7 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iconst %d)))))", @@ -103,10 +101,10 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s2[0])); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { +TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -116,7 +114,7 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" " (iload parm=2)))))" @@ -132,14 +130,17 @@ TEST_P(ArraycmpEqualTest, ArraycmpLenEqualVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(length, entry_point(&s1[0], &s2[0], length)); + EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { +INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpEqualTest, ::testing::Range(1, 128)); + +TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), "(method return=Int32 args=[Address, Address]" @@ -160,25 +161,28 @@ TEST_P(ArraycmpEqualTest, ArraycmpSameArray) { ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; std::vector s1(length, 0x5c); + std::vector s2(length, 0x5c); + s1[offset] = 0x81; + auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s1[0])); + EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", - length + " (iload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -190,24 +194,28 @@ TEST_P(ArraycmpEqualTest, ArraycmpEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0])); + s1[offset] = 0x81; + + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = GetParam(); + auto length = std::get<0>(GetParam()); + auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int32 args=[Address, Address]" " (block" " (ireturn" " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (iconst %d)))))", + length ); auto trees = parseString(inputTrees); @@ -219,13 +227,13 @@ TEST_P(ArraycmpEqualTest, ArraycmpEqualVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpEqual, entry_point(&s1[0], &s2[0], length)); -} + s1[offset] = 0x21; -INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpEqualTest, ::testing::Range(1, 128)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0])); +} -TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { +TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -233,14 +241,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int32 args=[Address, Address, Int32]" " (block" " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (arraycmp address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", - length + " (iload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -252,27 +259,74 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x3f; + s1[offset] = 0x21; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(offset, entry_point(&s1[0], &s2[0])); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualVariableLen) { +template +static std::vector> createArraycmpNotEqualParam() { + std::vector> v; + /* Small arrays */ + for (int i = 1; i < 32; i++) { + for (int j = 0; j < i; j++) { + v.push_back(std::make_tuple(i, j)); + } + } + /* Variation of the offset of mismatched element in 128 bytes array */ + for (int i = 0; i < 128; i++) { + v.push_back(std::make_tuple(128, i)); + } + /* Medium size arrays with the mismatched element near the end of the arrays */ + for (int i = 120; i < 136; i++) { + for (int j = 96; j < i; j++) { + v.push_back(std::make_tuple(i, j)); + } + } + /* A large size array with the mismatched element near the end of the array */ + for (int i = 4000; i < 4096; i++) { + v.push_back(std::make_tuple(4096, i)); + } + return v; +} +INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); + + +/** + * @brief TestFixture class for arraycmplen test + * + * @details Used for arraycmplen test with the arrays with same data. + * The parameter is the length parameter for the arraycmp evaluator. + */ +class ArraycmplenEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface {}; +/** + * @brief TestFixture class for arraycmplen test + * + * @details Used for arraycmplen test which has mismatched element. + * The first parameter is the length parameter for the arraycmp evaluator. + * The second parameter is the offset of the mismatched element in the arrays. + */ +class ArraycmplenNotEqualTest : public TRTest::JitTest, public ::testing::WithParamInterface> {}; + +TEST_P(ArraycmplenEqualTest, ArraycmpLenSameArray) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; + /* + * "address=0" parameter is needed for arraycmp opcode because "Call" property is set to the opcode. + */ std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" - " (arraycmp address=0 args=[Address, Address] flags=[15]" + " (lreturn" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (lconst %" OMR_PRId64 ")))))", + length ); auto trees = parseString(inputTrees); @@ -283,28 +337,24 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLenNotEqualVariableLen) { ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees; std::vector s1(length, 0x5c); - std::vector s2(length, 0x5c); - s1[offset] = 0x3f; - - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(offset, entry_point(&s1[0], &s2[0], length)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(length, entry_point(&s1[0], &s1[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { +TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (lreturn" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", + " (lconst %" OMR_PRId64 ")))))", length ); auto trees = parseString(inputTrees); @@ -317,27 +367,24 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x81; - - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0])); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(length, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { +TEST_P(ArraycmplenEqualTest, ArraycmpLenEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); - auto length = std::get<0>(GetParam()); - auto offset = std::get<1>(GetParam()); + auto length = GetParam(); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int64 args=[Address, Address, Int64]" " (block" - " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (lreturn" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (lload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -349,13 +396,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpGreaterThanVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x81; - - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpGreaterThan, entry_point(&s1[0], &s2[0], length)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(length, entry_point(&s1[0], &s2[0], length)); } -TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenEqualTest, ::testing::Range(static_cast(1), static_cast(128))); + +TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualConstLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -363,13 +410,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address]" + "(method return=Int64 args=[Address, Address]" " (block" - " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (lreturn" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iconst %d)))))", + " (lconst %" OMR_PRId64 ")))))", length ); auto trees = parseString(inputTrees); @@ -382,13 +429,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanConstLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x21; + s1[offset] = 0x3f; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0])); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(offset, entry_point(&s1[0], &s2[0])); } -TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { +TEST_P(ArraycmplenNotEqualTest, ArraycmpLenNotEqualVariableLen) { SKIP_ON_ARM(MissingImplementation); SKIP_ON_RISCV(MissingImplementation); @@ -396,13 +443,13 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { auto offset = std::get<1>(GetParam()); char inputTrees[1024] = {0}; std::snprintf(inputTrees, sizeof(inputTrees), - "(method return=Int32 args=[Address, Address, Int32]" + "(method return=Int64 args=[Address, Address, Int64]" " (block" - " (ireturn" - " (arraycmp address=0 args=[Address, Address]" + " (lreturn" + " (arraycmplen address=0 args=[Address, Address]" " (aload parm=0)" " (aload parm=1)" - " (iload parm=2)))))" + " (lload parm=2)))))" ); auto trees = parseString(inputTrees); @@ -414,34 +461,10 @@ TEST_P(ArraycmpNotEqualTest, ArraycmpLessThanVariableLen) { std::vector s1(length, 0x5c); std::vector s2(length, 0x5c); - s1[offset] = 0x21; + s1[offset] = 0x3f; - auto entry_point = compiler.getEntryPoint(); - EXPECT_EQ(returnValueForArraycmpLessThan, entry_point(&s1[0], &s2[0], length)); + auto entry_point = compiler.getEntryPoint(); + EXPECT_EQ(offset, entry_point(&s1[0], &s2[0], length)); } -static std::vector> createArraycmpNotEqualParam() { - std::vector> v; - /* Small arrays */ - for (int i = 1; i < 32; i++) { - for (int j = 0; j < i; j++) { - v.push_back(std::make_tuple(i, j)); - } - } - /* Variation of the offset of mismatched element in 128 bytes array */ - for (int i = 0; i < 128; i++) { - v.push_back(std::make_tuple(128, i)); - } - /* Medium size arrays with the mismatched element near the end of the arrays */ - for (int i = 120; i < 136; i++) { - for (int j = 96; j < i; j++) { - v.push_back(std::make_tuple(i, j)); - } - } - /* A large size array with the mismatched element near the end of the array */ - for (int i = 4000; i < 4096; i++) { - v.push_back(std::make_tuple(4096, i)); - } - return v; -} -INSTANTIATE_TEST_CASE_P(ArraycmpTest, ArraycmpNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam())); +INSTANTIATE_TEST_CASE_P(ArraycmplenTest, ArraycmplenNotEqualTest, ::testing::ValuesIn(createArraycmpNotEqualParam()));