Skip to content

Commit

Permalink
avcodec/ppc/fdctdsp: POWER LE support in ff_fdct_altivec() delete mac…
Browse files Browse the repository at this point in the history
…ros VEC_FMERGEH() VEC_FMERGEL(), they where wrong

GCC tool had a bug of PPC intrinsic interpret, which has been fixed in GCC 4.9.1. This bug lead to
errors in two of our previous patches. We found this when we update our GCC tools to 4.9.1 and by
reading the related info on GCC website. We fix our previous error in two separate commits

Signed-off-by: Michael Niedermayer <[email protected]>
  • Loading branch information
raiy236 authored and michaelni committed Apr 27, 2015
1 parent 603c839 commit 8a03d42
Showing 1 changed file with 39 additions and 47 deletions.
86 changes: 39 additions & 47 deletions libavcodec/ppc/fdctdsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,6 @@ static const vector float fdctconsts[3] = {
#define LD_WA vec_splat(cnsts2, 2)
#define LD_WB vec_splat(cnsts2, 3)

#if HAVE_BIGENDIAN
#define VEC_FMERGEH(a, b) vec_mergeh(a, b)
#define VEC_FMERGEL(a, b) vec_mergel(a, b)
#else
#define VEC_FMERGEH(a, b) vec_mergel(b, a)
#define VEC_FMERGEL(a, b) vec_mergeh(b, a)
#endif

#define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \
x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \
x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \
Expand Down Expand Up @@ -393,45 +385,45 @@ void ff_fdct_altivec(int16_t *block)
/* }}} */

/* 8x8 matrix transpose (vector float[8][2]) {{{ */
x0 = VEC_FMERGEL(b00, b20);
x1 = VEC_FMERGEH(b00, b20);
x2 = VEC_FMERGEL(b10, b30);
x3 = VEC_FMERGEH(b10, b30);

b00 = VEC_FMERGEH(x1, x3);
b10 = VEC_FMERGEL(x1, x3);
b20 = VEC_FMERGEH(x0, x2);
b30 = VEC_FMERGEL(x0, x2);

x4 = VEC_FMERGEL(b41, b61);
x5 = VEC_FMERGEH(b41, b61);
x6 = VEC_FMERGEL(b51, b71);
x7 = VEC_FMERGEH(b51, b71);

b41 = VEC_FMERGEH(x5, x7);
b51 = VEC_FMERGEL(x5, x7);
b61 = VEC_FMERGEH(x4, x6);
b71 = VEC_FMERGEL(x4, x6);

x0 = VEC_FMERGEL(b01, b21);
x1 = VEC_FMERGEH(b01, b21);
x2 = VEC_FMERGEL(b11, b31);
x3 = VEC_FMERGEH(b11, b31);

x4 = VEC_FMERGEL(b40, b60);
x5 = VEC_FMERGEH(b40, b60);
x6 = VEC_FMERGEL(b50, b70);
x7 = VEC_FMERGEH(b50, b70);

b40 = VEC_FMERGEH(x1, x3);
b50 = VEC_FMERGEL(x1, x3);
b60 = VEC_FMERGEH(x0, x2);
b70 = VEC_FMERGEL(x0, x2);

b01 = VEC_FMERGEH(x5, x7);
b11 = VEC_FMERGEL(x5, x7);
b21 = VEC_FMERGEH(x4, x6);
b31 = VEC_FMERGEL(x4, x6);
x0 = vec_mergel(b00, b20);
x1 = vec_mergeh(b00, b20);
x2 = vec_mergel(b10, b30);
x3 = vec_mergeh(b10, b30);

b00 = vec_mergeh(x1, x3);
b10 = vec_mergel(x1, x3);
b20 = vec_mergeh(x0, x2);
b30 = vec_mergel(x0, x2);

x4 = vec_mergel(b41, b61);
x5 = vec_mergeh(b41, b61);
x6 = vec_mergel(b51, b71);
x7 = vec_mergeh(b51, b71);

b41 = vec_mergeh(x5, x7);
b51 = vec_mergel(x5, x7);
b61 = vec_mergeh(x4, x6);
b71 = vec_mergel(x4, x6);

x0 = vec_mergel(b01, b21);
x1 = vec_mergeh(b01, b21);
x2 = vec_mergel(b11, b31);
x3 = vec_mergeh(b11, b31);

x4 = vec_mergel(b40, b60);
x5 = vec_mergeh(b40, b60);
x6 = vec_mergel(b50, b70);
x7 = vec_mergeh(b50, b70);

b40 = vec_mergeh(x1, x3);
b50 = vec_mergel(x1, x3);
b60 = vec_mergeh(x0, x2);
b70 = vec_mergel(x0, x2);

b01 = vec_mergeh(x5, x7);
b11 = vec_mergel(x5, x7);
b21 = vec_mergeh(x4, x6);
b31 = vec_mergel(x4, x6);
/* }}} */

FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
Expand Down

0 comments on commit 8a03d42

Please sign in to comment.