From 80928acb92d1e4b6db16bada6d21b1fb6fa66265 Mon Sep 17 00:00:00 2001 From: Arseny Kapoulkine Date: Wed, 21 Feb 2024 07:06:11 -0800 Subject: [PATCH] CodeGen: Extract all vector tag patching into TAG_VECTOR (#1171) Instead of patching the tag component with TVECTOR in every instruction that produces a vector value, we now use a separate IR instruction to do this. This reduces implementation redundancy, but more importantly allows for a class of optimizations: - NUM_TO_VECTOR previously patched the component unconditionally but the result was used only in MUL/DIV_VEC instructions that ignore it anyway; we can now remove this. - ADD_VEC et al can now forward the source of TAG_VECTOR instruction of either input; this shortens the latency chain and in the future could allow us to generate optimal vector instruction sequence once the temporary stores are marked as dead. - In the future on X64, ADD_VEC et al will be able to analyze the input instruction and remove tag masking conditionally. This is not part of this PR as it requires a decision around expected FP environment and/or the necessity of the existing masking to begin with. I've also renamed NUM_TO_VECTOR to NUM_TO_VEC so that "VEC" always refers to "3 float values" and for consistency with ADD/etc. Note: ADD_VEC input forwarding is currently performed unconditionally; it may or may not increase the spills that can't be reloaded from the stack. On A64 this makes the Taylor series computation a tiny bit faster (11.3ns => 11.0ns) as it removes the redundant ins instructions along the NUM_TO_VEC path. Curiously, the optimization of forwarding TAG_VECTOR input to arithmetic instructions actually has a small penalty as without it this PR runs at 10.9 ns. I don't know if this is a property of the benchmark though, as I just noticed that in this benchmark type inference actually fails to infer parts of the computation as a vector op. If desired I will happily omit this part of the change and we can explore that separately. --- CodeGen/include/Luau/IrData.h | 6 ++- CodeGen/include/Luau/IrUtils.h | 3 +- CodeGen/src/IrDump.cpp | 6 ++- CodeGen/src/IrLoweringA64.cpp | 66 +++++++++++++++++++------ CodeGen/src/IrLoweringX64.cpp | 34 ++++++++++--- CodeGen/src/IrTranslation.cpp | 22 +++++++-- CodeGen/src/IrUtils.cpp | 3 +- CodeGen/src/OptimizeConstProp.cpp | 45 +++++++++++++---- tests/IrLowering.test.cpp | 81 +++++++++++++++++++------------ 9 files changed, 192 insertions(+), 74 deletions(-) diff --git a/CodeGen/include/Luau/IrData.h b/CodeGen/include/Luau/IrData.h index 129536d10..1d9bbc73e 100644 --- a/CodeGen/include/Luau/IrData.h +++ b/CodeGen/include/Luau/IrData.h @@ -304,7 +304,11 @@ enum class IrCmd : uint8_t // Converts a double number to a vector with the value in X/Y/Z // A: double - NUM_TO_VECTOR, + NUM_TO_VEC, + + // Adds VECTOR type tag to a vector, preserving X/Y/Z components + // A: TValue + TAG_VECTOR, // Adjust stack top (L->top) to point at 'B' TValues *after* the specified register // This is used to return multiple values diff --git a/CodeGen/include/Luau/IrUtils.h b/CodeGen/include/Luau/IrUtils.h index 47ef505bb..0c8495e86 100644 --- a/CodeGen/include/Luau/IrUtils.h +++ b/CodeGen/include/Luau/IrUtils.h @@ -186,7 +186,8 @@ inline bool hasResult(IrCmd cmd) case IrCmd::UINT_TO_NUM: case IrCmd::NUM_TO_INT: case IrCmd::NUM_TO_UINT: - case IrCmd::NUM_TO_VECTOR: + case IrCmd::NUM_TO_VEC: + case IrCmd::TAG_VECTOR: case IrCmd::SUBSTITUTE: case IrCmd::INVOKE_FASTCALL: case IrCmd::BITAND_UINT: diff --git a/CodeGen/src/IrDump.cpp b/CodeGen/src/IrDump.cpp index de7a7fa48..9a1159534 100644 --- a/CodeGen/src/IrDump.cpp +++ b/CodeGen/src/IrDump.cpp @@ -205,8 +205,10 @@ const char* getCmdName(IrCmd cmd) return "NUM_TO_INT"; case IrCmd::NUM_TO_UINT: return "NUM_TO_UINT"; - case IrCmd::NUM_TO_VECTOR: - return "NUM_TO_VECTOR"; + case IrCmd::NUM_TO_VEC: + return "NUM_TO_VEC"; + case IrCmd::TAG_VECTOR: + return "TAG_VECTOR"; case IrCmd::ADJUST_STACK_TO_REG: return "ADJUST_STACK_TO_REG"; case IrCmd::ADJUST_STACK_TO_TOP: diff --git a/CodeGen/src/IrLoweringA64.cpp b/CodeGen/src/IrLoweringA64.cpp index 9d9df188d..6a5703d17 100644 --- a/CodeGen/src/IrLoweringA64.cpp +++ b/CodeGen/src/IrLoweringA64.cpp @@ -14,6 +14,8 @@ LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauCodeGenFixBufferLenCheckA64, false) LUAU_FASTFLAGVARIABLE(LuauCodeGenVectorA64, false) +LUAU_FASTFLAG(LuauCodegenVectorTag) + namespace Luau { namespace CodeGen @@ -678,9 +680,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { build.fadd(inst.regA64, regOp(inst.a), regOp(inst.b)); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + if (!FFlag::LuauCodegenVectorTag) + { + RegisterA64 tempw = regs.allocTemp(KindA64::w); + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } else { @@ -705,9 +710,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { build.fsub(inst.regA64, regOp(inst.a), regOp(inst.b)); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + if (!FFlag::LuauCodegenVectorTag) + { + RegisterA64 tempw = regs.allocTemp(KindA64::w); + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } else { @@ -732,9 +740,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { build.fmul(inst.regA64, regOp(inst.a), regOp(inst.b)); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + if (!FFlag::LuauCodegenVectorTag) + { + RegisterA64 tempw = regs.allocTemp(KindA64::w); + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } else { @@ -759,9 +770,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { build.fdiv(inst.regA64, regOp(inst.a), regOp(inst.b)); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + if (!FFlag::LuauCodegenVectorTag) + { + RegisterA64 tempw = regs.allocTemp(KindA64::w); + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } else { @@ -786,9 +800,12 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) { build.fneg(inst.regA64, regOp(inst.a)); - RegisterA64 tempw = regs.allocTemp(KindA64::w); - build.mov(tempw, LUA_TVECTOR); - build.ins_4s(inst.regA64, tempw, 3); + if (!FFlag::LuauCodegenVectorTag) + { + RegisterA64 tempw = regs.allocTemp(KindA64::w); + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } } else { @@ -1156,7 +1173,7 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.fcvtzs(castReg(KindA64::x, inst.regA64), temp); break; } - case IrCmd::NUM_TO_VECTOR: + case IrCmd::NUM_TO_VEC: { inst.regA64 = regs.allocReg(KindA64::q, index); @@ -1167,6 +1184,23 @@ void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.fcvt(temps, tempd); build.dup_4s(inst.regA64, castReg(KindA64::q, temps), 0); + if (!FFlag::LuauCodegenVectorTag) + { + build.mov(tempw, LUA_TVECTOR); + build.ins_4s(inst.regA64, tempw, 3); + } + break; + } + case IrCmd::TAG_VECTOR: + { + inst.regA64 = regs.allocReuse(KindA64::q, index, {inst.a}); + + RegisterA64 reg = regOp(inst.a); + RegisterA64 tempw = regs.allocTemp(KindA64::w); + + if (inst.regA64 != reg) + build.mov(inst.regA64, reg); + build.mov(tempw, LUA_TVECTOR); build.ins_4s(inst.regA64, tempw, 3); break; diff --git a/CodeGen/src/IrLoweringX64.cpp b/CodeGen/src/IrLoweringX64.cpp index babfdf46a..c5188dc47 100644 --- a/CodeGen/src/IrLoweringX64.cpp +++ b/CodeGen/src/IrLoweringX64.cpp @@ -15,6 +15,8 @@ #include "lstate.h" #include "lgc.h" +LUAU_FASTFLAG(LuauCodegenVectorTag) + namespace Luau { namespace CodeGen @@ -608,7 +610,9 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp()); build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp()); build.vaddps(inst.regX64, tmp1.reg, tmp2.reg); - build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); + + if (!FFlag::LuauCodegenVectorTag) + build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); break; } case IrCmd::SUB_VEC: @@ -622,7 +626,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp()); build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp()); build.vsubps(inst.regX64, tmp1.reg, tmp2.reg); - build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); + if (!FFlag::LuauCodegenVectorTag) + build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); break; } case IrCmd::MUL_VEC: @@ -636,7 +641,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp()); build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp()); build.vmulps(inst.regX64, tmp1.reg, tmp2.reg); - build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); + if (!FFlag::LuauCodegenVectorTag) + build.vorps(inst.regX64, inst.regX64, vectorOrMaskOp()); break; } case IrCmd::DIV_VEC: @@ -650,7 +656,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vandps(tmp1.reg, regOp(inst.a), vectorAndMaskOp()); build.vandps(tmp2.reg, regOp(inst.b), vectorAndMaskOp()); build.vdivps(inst.regX64, tmp1.reg, tmp2.reg); - build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); + if (!FFlag::LuauCodegenVectorTag) + build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); break; } case IrCmd::UNM_VEC: @@ -669,7 +676,8 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vxorpd(inst.regX64, inst.regX64, build.f32x4(-0.0, -0.0, -0.0, -0.0)); } - build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); + if (!FFlag::LuauCodegenVectorTag) + build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); break; } case IrCmd::NOT_ANY: @@ -964,7 +972,7 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) build.vcvttsd2si(qwordReg(inst.regX64), memRegDoubleOp(inst.a)); break; - case IrCmd::NUM_TO_VECTOR: + case IrCmd::NUM_TO_VEC: inst.regX64 = regs.allocReg(SizeX64::xmmword, index); if (inst.a.kind == IrOpKind::Constant) @@ -974,15 +982,25 @@ void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next) static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit"); memcpy(&asU32, &value, sizeof(value)); - build.vmovaps(inst.regX64, build.u32x4(asU32, asU32, asU32, LUA_TVECTOR)); + if (FFlag::LuauCodegenVectorTag) + build.vmovaps(inst.regX64, build.u32x4(asU32, asU32, asU32, 0)); + else + build.vmovaps(inst.regX64, build.u32x4(asU32, asU32, asU32, LUA_TVECTOR)); } else { build.vcvtsd2ss(inst.regX64, inst.regX64, memRegDoubleOp(inst.a)); build.vpshufps(inst.regX64, inst.regX64, inst.regX64, 0b00'00'00'00); - build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); + + if (!FFlag::LuauCodegenVectorTag) + build.vpinsrd(inst.regX64, inst.regX64, build.i32(LUA_TVECTOR), 3); } break; + case IrCmd::TAG_VECTOR: + inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {inst.a}); + + build.vpinsrd(inst.regX64, regOp(inst.a), build.i32(LUA_TVECTOR), 3); + break; case IrCmd::ADJUST_STACK_TO_REG: { ScopedRegX64 tmp{regs, SizeX64::qword}; diff --git a/CodeGen/src/IrTranslation.cpp b/CodeGen/src/IrTranslation.cpp index 44d0a264a..686d51301 100644 --- a/CodeGen/src/IrTranslation.cpp +++ b/CodeGen/src/IrTranslation.cpp @@ -14,6 +14,7 @@ LUAU_FASTFLAGVARIABLE(LuauCodegenLuData, false) LUAU_FASTFLAGVARIABLE(LuauCodegenVector, false) +LUAU_FASTFLAGVARIABLE(LuauCodegenVectorTag, false) namespace Luau { @@ -380,9 +381,12 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, result = build.inst(IrCmd::DIV_VEC, vb, vc); break; default: - break; + CODEGEN_ASSERT(!"Unknown TM op"); } + if (FFlag::LuauCodegenVectorTag) + result = build.inst(IrCmd::TAG_VECTOR, result); + build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), result); return; } @@ -393,7 +397,7 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(rc)), build.constTag(LUA_TVECTOR), build.vmExit(pcpos)); - IrOp vb = build.inst(IrCmd::NUM_TO_VECTOR, loadDoubleOrConstant(build, opb)); + IrOp vb = build.inst(IrCmd::NUM_TO_VEC, loadDoubleOrConstant(build, opb)); IrOp vc = build.inst(IrCmd::LOAD_TVALUE, opc); IrOp result; @@ -406,9 +410,12 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, result = build.inst(IrCmd::DIV_VEC, vb, vc); break; default: - break; + CODEGEN_ASSERT(!"Unknown TM op"); } + if (FFlag::LuauCodegenVectorTag) + result = build.inst(IrCmd::TAG_VECTOR, result); + build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), result); return; } @@ -420,7 +427,7 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, build.inst(IrCmd::CHECK_TAG, build.inst(IrCmd::LOAD_TAG, build.vmReg(rc)), build.constTag(LUA_TNUMBER), build.vmExit(pcpos)); IrOp vb = build.inst(IrCmd::LOAD_TVALUE, opb); - IrOp vc = build.inst(IrCmd::NUM_TO_VECTOR, loadDoubleOrConstant(build, opc)); + IrOp vc = build.inst(IrCmd::NUM_TO_VEC, loadDoubleOrConstant(build, opc)); IrOp result; switch (tm) @@ -432,9 +439,12 @@ static void translateInstBinaryNumeric(IrBuilder& build, int ra, int rb, int rc, result = build.inst(IrCmd::DIV_VEC, vb, vc); break; default: - break; + CODEGEN_ASSERT(!"Unknown TM op"); } + if (FFlag::LuauCodegenVectorTag) + result = build.inst(IrCmd::TAG_VECTOR, result); + build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), result); return; } @@ -596,6 +606,8 @@ void translateInstMinus(IrBuilder& build, const Instruction* pc, int pcpos) IrOp vb = build.inst(IrCmd::LOAD_TVALUE, build.vmReg(rb)); IrOp va = build.inst(IrCmd::UNM_VEC, vb); + if (FFlag::LuauCodegenVectorTag) + va = build.inst(IrCmd::TAG_VECTOR, va); build.inst(IrCmd::STORE_TVALUE, build.vmReg(ra), va); return; } diff --git a/CodeGen/src/IrUtils.cpp b/CodeGen/src/IrUtils.cpp index b49e974f4..caa6b1788 100644 --- a/CodeGen/src/IrUtils.cpp +++ b/CodeGen/src/IrUtils.cpp @@ -106,7 +106,8 @@ IrValueKind getCmdValueKind(IrCmd cmd) case IrCmd::NUM_TO_INT: case IrCmd::NUM_TO_UINT: return IrValueKind::Int; - case IrCmd::NUM_TO_VECTOR: + case IrCmd::NUM_TO_VEC: + case IrCmd::TAG_VECTOR: return IrValueKind::Tvalue; case IrCmd::ADJUST_STACK_TO_REG: case IrCmd::ADJUST_STACK_TO_TOP: diff --git a/CodeGen/src/OptimizeConstProp.cpp b/CodeGen/src/OptimizeConstProp.cpp index 0c543572f..4214d0153 100644 --- a/CodeGen/src/OptimizeConstProp.cpp +++ b/CodeGen/src/OptimizeConstProp.cpp @@ -18,6 +18,7 @@ LUAU_FASTINTVARIABLE(LuauCodeGenMinLinearBlockPath, 3) LUAU_FASTINTVARIABLE(LuauCodeGenReuseSlotLimit, 64) LUAU_FASTFLAGVARIABLE(DebugLuauAbortingChecks, false) LUAU_FASTFLAG(LuauCodegenVector) +LUAU_FASTFLAG(LuauCodegenVectorTag) LUAU_DYNAMIC_FASTFLAGVARIABLE(LuauCodeGenCheckGcEffectFix, false) namespace Luau @@ -715,9 +716,17 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& { if (IrInst* arg = function.asInstOp(inst.b)) { - if (arg->cmd == IrCmd::ADD_VEC || arg->cmd == IrCmd::SUB_VEC || arg->cmd == IrCmd::MUL_VEC || arg->cmd == IrCmd::DIV_VEC || - arg->cmd == IrCmd::UNM_VEC) - tag = LUA_TVECTOR; + if (FFlag::LuauCodegenVectorTag) + { + if (arg->cmd == IrCmd::TAG_VECTOR) + tag = LUA_TVECTOR; + } + else + { + if (arg->cmd == IrCmd::ADD_VEC || arg->cmd == IrCmd::SUB_VEC || arg->cmd == IrCmd::MUL_VEC || arg->cmd == IrCmd::DIV_VEC || + arg->cmd == IrCmd::UNM_VEC) + tag = LUA_TVECTOR; + } } } @@ -1250,6 +1259,28 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& if (int(state.checkSlotMatchCache.size()) < FInt::LuauCodeGenReuseSlotLimit) state.checkSlotMatchCache.push_back(index); break; + + case IrCmd::ADD_VEC: + case IrCmd::SUB_VEC: + case IrCmd::MUL_VEC: + case IrCmd::DIV_VEC: + if (FFlag::LuauCodegenVectorTag) + { + if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR) + inst.a = a->a; + if (IrInst* b = function.asInstOp(inst.b); b && b->cmd == IrCmd::TAG_VECTOR) + inst.b = b->a; + } + break; + + case IrCmd::UNM_VEC: + if (FFlag::LuauCodegenVectorTag) + { + if (IrInst* a = function.asInstOp(inst.a); a && a->cmd == IrCmd::TAG_VECTOR) + inst.a = a->a; + } + break; + case IrCmd::CHECK_NODE_NO_NEXT: case IrCmd::CHECK_NODE_VALUE: case IrCmd::BARRIER_TABLE_BACK: @@ -1278,12 +1309,8 @@ static void constPropInInst(ConstPropState& state, IrBuilder& build, IrFunction& case IrCmd::GET_TYPE: case IrCmd::GET_TYPEOF: case IrCmd::FINDUPVAL: - case IrCmd::ADD_VEC: - case IrCmd::SUB_VEC: - case IrCmd::MUL_VEC: - case IrCmd::DIV_VEC: - case IrCmd::UNM_VEC: - case IrCmd::NUM_TO_VECTOR: + case IrCmd::NUM_TO_VEC: + case IrCmd::TAG_VECTOR: break; case IrCmd::DO_ARITH: diff --git a/tests/IrLowering.test.cpp b/tests/IrLowering.test.cpp index bdb7e38cf..13f44dca2 100644 --- a/tests/IrLowering.test.cpp +++ b/tests/IrLowering.test.cpp @@ -13,6 +13,7 @@ #include LUAU_FASTFLAG(LuauCodegenVector) +LUAU_FASTFLAG(LuauCodegenVectorTag) LUAU_FASTFLAG(LuauCodegenMathMemArgs) static std::string getCodegenAssembly(const char* source) @@ -65,6 +66,7 @@ TEST_SUITE_BEGIN("IrLowering"); TEST_CASE("VectorReciprocal") { ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true}; + ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true}; CHECK_EQ("\n" + getCodegenAssembly(R"( local function vecrcp(a: vector) @@ -79,10 +81,11 @@ end bb_2: JUMP bb_bytecode_1 bb_bytecode_1: - %6 = NUM_TO_VECTOR 1 + %6 = NUM_TO_VEC 1 %7 = LOAD_TVALUE R0 %8 = DIV_VEC %6, %7 - STORE_TVALUE R1, %8 + %9 = TAG_VECTOR %8 + STORE_TVALUE R1, %9 INTERRUPT 1u RETURN R1, 1i )"); @@ -127,6 +130,7 @@ end TEST_CASE("VectorAdd") { ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true}; + ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true}; CHECK_EQ("\n" + getCodegenAssembly(R"( local function vec3add(a: vector, b: vector) @@ -145,7 +149,8 @@ end %10 = LOAD_TVALUE R0 %11 = LOAD_TVALUE R1 %12 = ADD_VEC %10, %11 - STORE_TVALUE R2, %12 + %13 = TAG_VECTOR %12 + STORE_TVALUE R2, %13 INTERRUPT 1u RETURN R2, 1i )"); @@ -154,6 +159,7 @@ end TEST_CASE("VectorMinus") { ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true}; + ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true}; CHECK_EQ("\n" + getCodegenAssembly(R"( local function vec3minus(a: vector) @@ -170,7 +176,8 @@ end bb_bytecode_1: %6 = LOAD_TVALUE R0 %7 = UNM_VEC %6 - STORE_TVALUE R1, %7 + %8 = TAG_VECTOR %7 + STORE_TVALUE R1, %8 INTERRUPT 1u RETURN R1, 1i )"); @@ -179,6 +186,7 @@ end TEST_CASE("VectorSubMulDiv") { ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true}; + ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true}; CHECK_EQ("\n" + getCodegenAssembly(R"( local function vec3combo(a: vector, b: vector, c: vector, d: vector) @@ -199,13 +207,16 @@ end %14 = LOAD_TVALUE R0 %15 = LOAD_TVALUE R1 %16 = MUL_VEC %14, %15 - STORE_TVALUE R5, %16 - %22 = LOAD_TVALUE R2 - %23 = LOAD_TVALUE R3 - %24 = DIV_VEC %22, %23 - STORE_TVALUE R6, %24 - %32 = SUB_VEC %16, %24 - STORE_TVALUE R4, %32 + %17 = TAG_VECTOR %16 + STORE_TVALUE R5, %17 + %23 = LOAD_TVALUE R2 + %24 = LOAD_TVALUE R3 + %25 = DIV_VEC %23, %24 + %26 = TAG_VECTOR %25 + STORE_TVALUE R6, %26 + %34 = SUB_VEC %16, %25 + %35 = TAG_VECTOR %34 + STORE_TVALUE R4, %35 INTERRUPT 3u RETURN R4, 1i )"); @@ -214,6 +225,7 @@ end TEST_CASE("VectorMulDivMixed") { ScopedFastFlag luauCodegenVector{FFlag::LuauCodegenVector, true}; + ScopedFastFlag luauCodegenVectorTag{FFlag::LuauCodegenVectorTag, true}; CHECK_EQ("\n" + getCodegenAssembly(R"( local function vec3combo(a: vector, b: vector, c: vector, d: vector) @@ -232,29 +244,36 @@ end JUMP bb_bytecode_1 bb_bytecode_1: %12 = LOAD_TVALUE R0 - %13 = NUM_TO_VECTOR 2 + %13 = NUM_TO_VEC 2 %14 = MUL_VEC %12, %13 - STORE_TVALUE R7, %14 - %18 = LOAD_TVALUE R1 - %19 = NUM_TO_VECTOR 4 - %20 = DIV_VEC %18, %19 - STORE_TVALUE R8, %20 - %28 = ADD_VEC %14, %20 - STORE_TVALUE R6, %28 + %15 = TAG_VECTOR %14 + STORE_TVALUE R7, %15 + %19 = LOAD_TVALUE R1 + %20 = NUM_TO_VEC 4 + %21 = DIV_VEC %19, %20 + %22 = TAG_VECTOR %21 + STORE_TVALUE R8, %22 + %30 = ADD_VEC %14, %21 + %31 = TAG_VECTOR %30 + STORE_TVALUE R6, %31 STORE_DOUBLE R8, 0.5 STORE_TAG R8, tnumber - %37 = NUM_TO_VECTOR 0.5 - %38 = LOAD_TVALUE R2 - %39 = MUL_VEC %37, %38 - STORE_TVALUE R7, %39 - %47 = ADD_VEC %28, %39 - STORE_TVALUE R5, %47 - %51 = NUM_TO_VECTOR 40 - %52 = LOAD_TVALUE R3 - %53 = DIV_VEC %51, %52 - STORE_TVALUE R6, %53 - %61 = ADD_VEC %47, %53 - STORE_TVALUE R4, %61 + %40 = NUM_TO_VEC 0.5 + %41 = LOAD_TVALUE R2 + %42 = MUL_VEC %40, %41 + %43 = TAG_VECTOR %42 + STORE_TVALUE R7, %43 + %51 = ADD_VEC %30, %42 + %52 = TAG_VECTOR %51 + STORE_TVALUE R5, %52 + %56 = NUM_TO_VEC 40 + %57 = LOAD_TVALUE R3 + %58 = DIV_VEC %56, %57 + %59 = TAG_VECTOR %58 + STORE_TVALUE R6, %59 + %67 = ADD_VEC %51, %58 + %68 = TAG_VECTOR %67 + STORE_TVALUE R4, %68 INTERRUPT 8u RETURN R4, 1i )");