diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index c551eb6fbf86..a1c459f9a3ff 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -525,26 +525,18 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u const float *pos = verts + i * vertStride; float32x4_t objpos = vld1q_f32(pos); float32x4_t worldpos = vaddq_f32( - vaddq_f32( + vmlaq_laneq_f32( vmulq_laneq_f32(worldX, objpos, 0), - vmulq_laneq_f32(worldY, objpos, 1) - ), - vaddq_f32( - vmulq_laneq_f32(worldZ, objpos, 2), - worldW - ) + worldY, objpos, 1), + vmlaq_laneq_f32(worldW, worldZ, objpos, 2) ); // OK, now we check it against the four planes. // This is really curiously similar to a matrix multiplication (well, it is one). float32x4_t planeDist = vaddq_f32( - vaddq_f32( + vmlaq_laneq_f32( vmulq_laneq_f32(planeX, worldpos, 0), - vmulq_laneq_f32(planeY, worldpos, 1) - ), - vaddq_f32( - vmulq_laneq_f32(planeZ, worldpos, 2), - planeW - ) + planeY, worldpos, 1), + vmlaq_laneq_f32(planeW, planeZ, worldpos, 2) ); inside = vorrq_u32(inside, vcgeq_f32(planeDist, vdupq_n_f32(0.0f))); }