Skip to content

Commit

Permalink
Merge pull request #7113 from BradleyWood/vbitselect
Browse files Browse the repository at this point in the history
x86: Implement vbitselect evaluator
  • Loading branch information
0xdaryl authored Sep 21, 2023
2 parents 47af8aa + 12fbec5 commit 0ed8736
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 16 deletions.
6 changes: 0 additions & 6 deletions compiler/x/amd64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1558,12 +1558,6 @@ OMR::X86::AMD64::TreeEvaluator::vcalliEvaluator(TR::Node *node, TR::CodeGenerato
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register*
OMR::X86::AMD64::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register*
OMR::X86::AMD64::TreeEvaluator::vcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
Expand Down
1 change: 0 additions & 1 deletion compiler/x/amd64/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator
static TR::Register *vreturnEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vRegStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vsetelemEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vsplatsEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
Expand Down
3 changes: 3 additions & 0 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,9 @@ bool OMR::X86::CodeGenerator::getSupportsOpCodeForAutoSIMD(TR::CPU *cpu, TR::ILO
return false;
}
break;
case TR::vbitselect:
if (et.isFloatingPoint()) return false;
// Intentional fallthrough
case TR::vneg:
switch (ot.getVectorLength()) {
case TR::VectorLength128:
Expand Down
53 changes: 53 additions & 0 deletions compiler/x/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6212,6 +6212,59 @@ OMR::X86::TreeEvaluator::vmbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register*
OMR::X86::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR::DataType et = node->getDataType().getVectorElementType();
TR::VectorLength vl = node->getDataType().getVectorLength();

TR::Node *firstChild = node->getFirstChild();
TR::Node *secondChild = node->getSecondChild();
TR::Node *thirdChild = node->getThirdChild();

TR::Register *firstReg = cg->evaluate(firstChild);
TR::Register *secondReg = cg->evaluate(secondChild);
TR::Register *thirdReg = cg->evaluate(thirdChild);
TR::Register *resultReg = cg->allocateRegister(TR_VRF);

TR_ASSERT_FATAL(et.isIntegral(), "vbitselect is for integer operations");

TR::InstOpCode xorOpcode = TR::InstOpCode::PXORRegReg;
TR::InstOpCode andOpcode = TR::InstOpCode::PANDRegReg;

OMR::X86::Encoding xorEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl);
OMR::X86::Encoding andEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl);

TR_ASSERT_FATAL(xorEncoding != OMR::X86::Bad, "No encoding method for pxor opcode");
TR_ASSERT_FATAL(andEncoding != OMR::X86::Bad, "No encoding method for pand opcode");

// inputA[i] ^ ((inputA[i] ^ inputB[i]) & inputC[i])

if (xorEncoding != Legacy)
{
generateRegRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, firstReg, secondReg, cg, xorEncoding);
}
else
{
TR::InstOpCode movOpcode = TR::InstOpCode::MOVDQURegReg;
OMR::X86::Encoding movEncoding = xorOpcode.getSIMDEncoding(&cg->comp()->target().cpu, vl);

TR_ASSERT_FATAL(movEncoding != OMR::X86::Bad, "No encoding method for movdqu opcode");
generateRegRegInstruction(movOpcode.getMnemonic(), node, resultReg, firstReg, cg, movEncoding);
generateRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, secondReg, cg, xorEncoding);
}

generateRegRegInstruction(andOpcode.getMnemonic(), node, resultReg, thirdReg, cg, xorEncoding);
generateRegRegInstruction(xorOpcode.getMnemonic(), node, resultReg, firstReg, cg, xorEncoding);

node->setRegister(resultReg);
cg->decReferenceCount(firstChild);
cg->decReferenceCount(secondChild);
cg->decReferenceCount(thirdChild);

return resultReg;
}

TR::Register*
OMR::X86::TreeEvaluator::vcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
Expand Down
1 change: 1 addition & 0 deletions compiler/x/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
static TR::Register *vmbitswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vmbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vmcompressbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vexpandbitsEvaluator(TR::Node *node, TR::CodeGenerator *cg);
Expand Down
6 changes: 0 additions & 6 deletions compiler/x/i386/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1637,12 +1637,6 @@ OMR::X86::I386::TreeEvaluator::vcalliEvaluator(TR::Node *node, TR::CodeGenerator
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register*
OMR::X86::I386::TreeEvaluator::vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
}

TR::Register*
OMR::X86::I386::TreeEvaluator::vcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
Expand Down
1 change: 0 additions & 1 deletion compiler/x/i386/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator
static TR::Register *vreturnEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vRegStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vbitselectEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vsetelemEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vsplatsEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *vstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg);
Expand Down
2 changes: 0 additions & 2 deletions fvtest/compilertriltest/VectorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,6 @@ TEST_F(VectorTest, VInt8BitSelect) {
SKIP_ON_S390(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)";
SKIP_ON_S390X(KnownBug) << "This test is currently disabled on Z platforms because not all Z platforms have vector support (issue #1843)";
SKIP_ON_RISCV(MissingImplementation);
SKIP_ON_X86(MissingImplementation);
SKIP_ON_HAMMER(MissingImplementation);

Tril::DefaultCompiler compiler(trees);
ASSERT_EQ(0, compiler.compile()) << "Compilation failed unexpectedly\n" << "Input trees: " << inputTrees;
Expand Down

0 comments on commit 0ed8736

Please sign in to comment.