Skip to content

Commit

Permalink
More NEON MLA
Browse files Browse the repository at this point in the history
  • Loading branch information
hrydgard committed Dec 9, 2023
1 parent 55d4875 commit 08ce69d
Showing 1 changed file with 6 additions and 14 deletions.
20 changes: 6 additions & 14 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,26 +525,18 @@ bool DrawEngineCommon::TestBoundingBoxFast(const void *vdata, int vertexCount, u
const float *pos = verts + i * vertStride;
float32x4_t objpos = vld1q_f32(pos);
float32x4_t worldpos = vaddq_f32(
vaddq_f32(
vmlaq_laneq_f32(
vmulq_laneq_f32(worldX, objpos, 0),
vmulq_laneq_f32(worldY, objpos, 1)
),
vaddq_f32(
vmulq_laneq_f32(worldZ, objpos, 2),
worldW
)
worldY, objpos, 1),
vmlaq_laneq_f32(worldW, worldZ, objpos, 2)
);
// OK, now we check it against the four planes.
// This is really curiously similar to a matrix multiplication (well, it is one).
float32x4_t planeDist = vaddq_f32(
vaddq_f32(
vmlaq_laneq_f32(
vmulq_laneq_f32(planeX, worldpos, 0),
vmulq_laneq_f32(planeY, worldpos, 1)
),
vaddq_f32(
vmulq_laneq_f32(planeZ, worldpos, 2),
planeW
)
planeY, worldpos, 1),
vmlaq_laneq_f32(planeW, planeZ, worldpos, 2)
);
inside = vorrq_u32(inside, vcgeq_f32(planeDist, vdupq_n_f32(0.0f)));
}
Expand Down

0 comments on commit 08ce69d

Please sign in to comment.