/*==================================================================================== EVS Codec 3GPP TS26.442 Jun 30, 2015. Version CR 26.442-0010 ====================================================================================*/ #include #include "prot_fx.h" #include "basop_util.h" #include "rom_basop_util.h" #include "options.h" #include "stl.h" /** * \brief Profiling / Precision results * * Profiling / Precision of complex valued FFTs: BASOP_cfft() * * WOPS BASOP Precision BASOP * FFT5 87 16.96 * FFT8 108 17.04 * FFT10 194 16.70 * FFT15 354 16.97 * FFT16 288 16.62 * FFT20 368 16.06 * FFT30 828 16.80 * FFT32 752 15.45 (cplx mult mit 3 mult und 3 add) * FFT32 824 16.07 (cplx mult mit 4 mult und 2 add) * FFT64 ( 8x 8) 3.129 15.16 * FFT80 (10x 8) 4.385 15.55 * FFT100 (20x 5) 6.518 15.65 * FFT120 (15x 8) 7.029 15.38 * FFT128 (16x 8) 6.777 15.28 * FFT160 (20x 8) 9.033 14.95 * FFT240 (30x 8) 14.961 15.49 * FFT256 (32x 8) 14.905 14.61 (cplx mult mit 3 mult und 3 add) * FFT256 (32x 8) 15.265 15.04 (cplx mult mit 4 mult und 2 add) * FFT320 (20x16) 21.517 15.21 * * * Profiling / Precision of real valued FFTs / iFFTs: BASOP_rfft() * * WOPS BASOP Precision BASOP * rFFT40 955 15.68 * rFFT64 1635 16.17 * * irFFT40 1116 15.36 * irFFT64 1759 15.18 * */ #define Mpy_32_xx Mpy_32_16_1 #define FFTC(x) WORD322WORD16((Word32)x) #define C31 (FFTC(0x91261468)) /* FL2WORD32( -0.86602540) -sqrt(3)/2 */ #define C51 (FFTC(0x79bc3854)) /* FL2WORD32( 0.95105652) */ #define C52 (FFTC(0x9d839db0)) /* FL2WORD32(-1.53884180/2) */ #define C53 (FFTC(0xd18053ce)) /* FL2WORD32(-0.36327126) */ #define C54 (FFTC(0x478dde64)) /* FL2WORD32( 0.55901699) */ #define C55 (FFTC(0xb0000001)) /* FL2WORD32(-1.25/2) */ #define C81 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) */ #define C82 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) */ #define C161 (FFTC(0x5a82799a)) /* FL2WORD32( 7.071067811865475e-1) INV_SQRT2 */ #define C162 (FFTC(0xa57d8666)) /* FL2WORD32(-7.071067811865475e-1) -INV_SQRT2 */ #define C163 (FFTC(0x7641af3d)) /* FL2WORD32( 9.238795325112867e-1) COS_PI_DIV8 */ #define C164 (FFTC(0x89be50c3)) /* FL2WORD32(-9.238795325112867e-1) -COS_PI_DIV8 */ #define C165 (FFTC(0x30fbc54d)) /* FL2WORD32( 3.826834323650898e-1) COS_3PI_DIV8 */ #define C166 (FFTC(0xcf043ab3)) /* FL2WORD32(-3.826834323650898e-1) -COS_3PI_DIV8 */ #define cplxMpy3_0(a,b,c,d) as = L_shr(a,1); \ bs = L_shr(b,1); \ a = L_sub(Mpy_32_xx(as,c),Mpy_32_xx(bs,d)); \ b = L_add(Mpy_32_xx(as,d),Mpy_32_xx(bs,c)); #define cplxMpy4_4_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTOR60-SCALEFACTOR15); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTOR60-SCALEFACTOR15); #define cplxMpy4_4_1(re,im,a,b) re = L_shr(a,SCALEFACTOR60-SCALEFACTOR15); \ im = L_shr(b,SCALEFACTOR60-SCALEFACTOR15); #define cplxMpy4_8_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),1); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),1); #define cplxMpy4_8_1(re,im,a,b) re = L_shr(a,1); \ im = L_shr(b,1); /* re = a*c - b*d im = a*d + b*c */ #if (SCALEFACTOR20 == SCALEFACTOR10) #define cplxMpy4_10_0(re,im,a,b,c,d) re = L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)); move32(); \ im = L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)); move32(); #define cplxMpy4_10_1(re,im,a,b) re = (a); move32(); \ im = (b); move32(); #else #define cplxMpy4_10_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTOR20-SCALEFACTOR10); move32(); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTOR20-SCALEFACTOR10); move32(); #define cplxMpy4_10_1(re,im,a,b) re = L_shr(a,SCALEFACTOR20-SCALEFACTOR10); move32(); \ im = L_shr(b,SCALEFACTOR20-SCALEFACTOR10); move32(); #endif #if (SCALEFACTOR20 == SCALEFACTOR30) #define cplxMpy4_20_30_0(re,im,a,b,c,d) re = L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)); move32(); \ im = L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)); move32(); #define cplxMpy4_20_30_1(re,im,a,b) re = (a); move32(); \ im = (b); move32(); #else #define cplxMpy4_20_30_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTOR20-SCALEFACTOR30); move32(); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTOR20-SCALEFACTOR30); move32(); #define cplxMpy4_20_30_1(re,im,a,b) re = L_shr(a,SCALEFACTOR20-SCALEFACTOR30); move32(); \ im = L_shr(b,SCALEFACTOR20-SCALEFACTOR30); move32(); #endif #define cplxMpy4_12_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTOR16-SCALEFACTOR12); move32(); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTOR16-SCALEFACTOR12); move32(); #define cplxMpy4_12_1(re,im,a,b) re = L_shr(a,SCALEFACTOR16-SCALEFACTOR12); move32(); \ im = L_shr(b,SCALEFACTOR16-SCALEFACTOR12); move32(); #define cplxMpy4_16_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTOR16); move32(); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTOR16); move32(); #define cplxMpy4_16_1(re,im,a,b) re = L_shr(a,SCALEFACTOR16); move32(); \ im = L_shr(b,SCALEFACTOR16); move32(); #define cplxMpy4_5_0(re,im,a,b,c,d) re = L_shr(L_sub(Mpy_32_xx(a,c),Mpy_32_xx(b,d)),SCALEFACTORN2); \ im = L_shr(L_add(Mpy_32_xx(a,d),Mpy_32_xx(b,c)),SCALEFACTORN2); #define cplxMpy4_5_1(re,im,a,b) re = L_shr(a,SCALEFACTORN2); \ im = L_shr(b,SCALEFACTORN2); /** * \brief Function performs a complex 5-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR5 bits. * * WOPS with 32x16 bit multiplications: 88 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft5(Word32 *re, Word32 *im, Word16 s) { Word32 x0,x1,x2,x3,x4; Word32 r1,r2,r3,r4; Word32 s1,s2,s3,s4; Word32 t; /* */ /* real part */ x0 = L_shr(re[s*0],SCALEFACTOR5); x1 = L_shr(re[s*1],SCALEFACTOR5); x2 = L_shr(re[s*2],SCALEFACTOR5); x3 = L_shr(re[s*3],SCALEFACTOR5); x4 = L_shr(re[s*4],SCALEFACTOR5); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x2,x3); r2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); re[0] = L_add(x0,r1); move32(); /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of the values as fracts */ r1 = L_add(re[0],(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx(L_add(r4,r2),C51); /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of the values as fracts */ r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s*0],SCALEFACTOR5); x1 = L_shr(im[s*1],SCALEFACTOR5); x2 = L_shr(im[s*2],SCALEFACTOR5); x3 = L_shr(im[s*3],SCALEFACTOR5); x4 = L_shr(im[s*4],SCALEFACTOR5); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x2,x3); s2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); im[0] = L_add(x0,s1); move32(); /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of the values as fracts */ s1 = L_add(im[0],L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of the values as fracts */ s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ re[s*1] = L_add(r1,s2); move32(); re[s*4] = L_sub(r1,s2); move32(); re[s*2] = L_sub(r3,s4); move32(); re[s*3] = L_add(r3,s4); move32(); im[s*1] = L_sub(s1,r2); move32(); im[s*4] = L_add(s1,r2); move32(); im[s*2] = L_add(s3,r4); move32(); im[s*3] = L_sub(s3,r4); move32(); /* */ } /** * \brief Function performs a complex 8-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR8 bits. * * WOPS with 32x16 bit multiplications: 108 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft8(Word32 *re, Word32 *im, Word16 s) { Word32 x00,x01,x02,x03,x04,x05,x06,x07; Word32 x08,x09,x10,x11,x12,x13,x14,x15; Word32 t00,t01,t02,t03,t04,t05,t06,t07; Word32 t08,t09,t10,t11,t12,t13,t14,t15; Word32 s00,s01,s02,s03,s04,s05,s06,s07; Word32 s08,s09,s10,s11,s12,s13,s14,s15; /* Pre-additions */ x00 = L_shr(re[s*0],SCALEFACTOR8); x01 = L_shr(im[s*0],SCALEFACTOR8); x02 = L_shr(re[s*1],SCALEFACTOR8); x03 = L_shr(im[s*1],SCALEFACTOR8); x04 = L_shr(re[s*2],SCALEFACTOR8); x05 = L_shr(im[s*2],SCALEFACTOR8); x06 = L_shr(re[s*3],SCALEFACTOR8); x07 = L_shr(im[s*3],SCALEFACTOR8); x08 = L_shr(re[s*4],SCALEFACTOR8); x09 = L_shr(im[s*4],SCALEFACTOR8); x10 = L_shr(re[s*5],SCALEFACTOR8); x11 = L_shr(im[s*5],SCALEFACTOR8); x12 = L_shr(re[s*6],SCALEFACTOR8); x13 = L_shr(im[s*6],SCALEFACTOR8); x14 = L_shr(re[s*7],SCALEFACTOR8); x15 = L_shr(im[s*7],SCALEFACTOR8); t00 = L_add(x00,x08); t02 = L_sub(x00,x08); t01 = L_add(x01,x09); t03 = L_sub(x01,x09); t04 = L_add(x02,x10); t06 = L_sub(x02,x10); t05 = L_add(x03,x11); t07 = L_sub(x03,x11); t08 = L_add(x04,x12); t10 = L_sub(x04,x12); t09 = L_add(x05,x13); t11 = L_sub(x05,x13); t12 = L_add(x06,x14); t14 = L_sub(x06,x14); t13 = L_add(x07,x15); t15 = L_sub(x07,x15); /* Pre-additions and core multiplications */ s00 = L_add(t00,t08); s04 = L_sub(t00,t08); s01 = L_add(t01,t09); s05 = L_sub(t01,t09); s08 = L_sub(t02,t11); s10 = L_add(t02,t11); s09 = L_add(t03,t10); s11 = L_sub(t03,t10); s02 = L_add(t04,t12); s07 = L_sub(t04,t12); s03 = L_add(t05,t13); s06 = L_sub(t13,t05); t01 = L_add(t06,t14); t02 = L_sub(t06,t14); t00 = L_add(t07,t15); t03 = L_sub(t07,t15); s12 = Mpy_32_xx(L_add(t00,t02),C81); s14 = Mpy_32_xx(L_sub(t00,t02),C81); s13 = Mpy_32_xx(L_sub(t03,t01),C81); s15 = Mpy_32_xx(L_add(t01,t03),C82); /* Post-additions */ re[s*0] = L_add(s00,s02); move32(); re[s*4] = L_sub(s00,s02); move32(); im[s*0] = L_add(s01,s03); move32(); im[s*4] = L_sub(s01,s03); move32(); re[s*2] = L_sub(s04,s06); move32(); re[s*6] = L_add(s04,s06); move32(); im[s*2] = L_sub(s05,s07); move32(); im[s*6] = L_add(s05,s07); move32(); re[s*3] = L_add(s08,s14); move32(); re[s*7] = L_sub(s08,s14); move32(); im[s*3] = L_add(s09,s15); move32(); im[s*7] = L_sub(s09,s15); move32(); re[s*1] = L_add(s10,s12); move32(); re[s*5] = L_sub(s10,s12); move32(); im[s*1] = L_add(s11,s13); move32(); im[s*5] = L_sub(s11,s13); move32(); } /** * \brief Function performs a complex 10-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR10 bits. * * WOPS with 32x16 bit multiplications: 196 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft10(Word32 *re, Word32 *im, Word16 s) { Word32 t; Word32 x0,x1,x2,x3,x4; Word32 r1,r2,r3,r4; Word32 s1,s2,s3,s4; Word32 y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; Word32 y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; /* 2 fft5 stages */ /* real part */ x0 = L_shr(re[s*0],SCALEFACTOR10); x1 = L_shr(re[s*2],SCALEFACTOR10); x2 = L_shr(re[s*4],SCALEFACTOR10); x3 = L_shr(re[s*6],SCALEFACTOR10); x4 = L_shr(re[s*8],SCALEFACTOR10); r1 = L_add(x3,x2); r4 = L_sub(x3,x2); r3 = L_add(x1,x4); r2 = L_sub(x1,x4); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y00 = L_add(x0,r1); r1 = L_add(y00,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s*0],SCALEFACTOR10); x1 = L_shr(im[s*2],SCALEFACTOR10); x2 = L_shr(im[s*4],SCALEFACTOR10); x3 = L_shr(im[s*6],SCALEFACTOR10); x4 = L_shr(im[s*8],SCALEFACTOR10); s1 = L_add(x3,x2); s4 = L_sub(x3,x2); s3 = L_add(x1,x4); s2 = L_sub(x1,x4); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y01 = L_add(x0,s1); s1 = L_add(y01,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y04 = L_add(r1,s2); y16 = L_sub(r1,s2); y08 = L_sub(r3,s4); y12 = L_add(r3,s4); y05 = L_sub(s1,r2); y17 = L_add(s1,r2); y09 = L_add(s3,r4); y13 = L_sub(s3,r4); /* real part */ x0 = L_shr(re[s*5],SCALEFACTOR10); x1 = L_shr(re[s*1],SCALEFACTOR10); x2 = L_shr(re[s*3],SCALEFACTOR10); x3 = L_shr(re[s*7],SCALEFACTOR10); x4 = L_shr(re[s*9],SCALEFACTOR10); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x3,x2); r2 = L_sub(x3,x2); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y02 = L_add(x0,r1); r1 = L_add(y02,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s*5],SCALEFACTOR10); x1 = L_shr(im[s*1],SCALEFACTOR10); x2 = L_shr(im[s*3],SCALEFACTOR10); x3 = L_shr(im[s*7],SCALEFACTOR10); x4 = L_shr(im[s*9],SCALEFACTOR10); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x3,x2); s2 = L_sub(x3,x2); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y03 = L_add(x0,s1); s1 = L_add(y03,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y06 = L_add(r1,s2); y18 = L_sub(r1,s2); y10 = L_sub(r3,s4); y14 = L_add(r3,s4); y07 = L_sub(s1,r2); y19 = L_add(s1,r2); y11 = L_add(s3,r4); y15 = L_sub(s3,r4); /* 5 fft2 stages */ re[s*0] = L_add(y00,y02); move32(); im[s*0] = L_add(y01,y03); move32(); re[s*5] = L_sub(y00,y02); move32(); im[s*5] = L_sub(y01,y03); move32(); re[s*2] = L_add(y04,y06); move32(); im[s*2] = L_add(y05,y07); move32(); re[s*7] = L_sub(y04,y06); move32(); im[s*7] = L_sub(y05,y07); move32(); re[s*4] = L_add(y08,y10); move32(); im[s*4] = L_add(y09,y11); move32(); re[s*9] = L_sub(y08,y10); move32(); im[s*9] = L_sub(y09,y11); move32(); re[s*6] = L_add(y12,y14); move32(); im[s*6] = L_add(y13,y15); move32(); re[s*1] = L_sub(y12,y14); move32(); im[s*1] = L_sub(y13,y15); move32(); re[s*8] = L_add(y16,y18); move32(); im[s*8] = L_add(y17,y19); move32(); re[s*3] = L_sub(y16,y18); move32(); im[s*3] = L_sub(y17,y19); move32(); } /** * \brief Function performs a complex 15-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR15 bits. * * WOPS with 32x16 bit multiplications: 354 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft15(Word32 *re, Word32 *im, Word16 s) { Word32 t; Word32 r1,r2,r3,r4; Word32 s1,s2,s3,s4; Word32 x00,x01,x02,x03,x04,x05,x06,x07,x08,x09; Word32 x10,x11,x12,x13,x14,x15,x16,x17,x18,x19; Word32 x20,x21,x22,x23,x24,x25,x26,x27,x28,x29; Word32 y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; Word32 y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; Word32 y20,y21,y22,y23,y24,y25,y26,y27,y28,y29; x00 = L_shr(re[s* 0],SCALEFACTOR15); x01 = L_shr(im[s* 0],SCALEFACTOR15); x02 = L_shr(re[s* 3],SCALEFACTOR15); x03 = L_shr(im[s* 3],SCALEFACTOR15); x04 = L_shr(re[s* 6],SCALEFACTOR15); x05 = L_shr(im[s* 6],SCALEFACTOR15); x06 = L_shr(re[s* 9],SCALEFACTOR15); x07 = L_shr(im[s* 9],SCALEFACTOR15); x08 = L_shr(re[s*12],SCALEFACTOR15); x09 = L_shr(im[s*12],SCALEFACTOR15); x10 = L_shr(re[s* 5],SCALEFACTOR15); x11 = L_shr(im[s* 5],SCALEFACTOR15); x12 = L_shr(re[s* 8],SCALEFACTOR15); x13 = L_shr(im[s* 8],SCALEFACTOR15); x14 = L_shr(re[s*11],SCALEFACTOR15); x15 = L_shr(im[s*11],SCALEFACTOR15); x16 = L_shr(re[s*14],SCALEFACTOR15); x17 = L_shr(im[s*14],SCALEFACTOR15); x18 = L_shr(re[s* 2],SCALEFACTOR15); x19 = L_shr(im[s* 2],SCALEFACTOR15); x20 = L_shr(re[s*10],SCALEFACTOR15); x21 = L_shr(im[s*10],SCALEFACTOR15); x22 = L_shr(re[s*13],SCALEFACTOR15); x23 = L_shr(im[s*13],SCALEFACTOR15); x24 = L_shr(re[s* 1],SCALEFACTOR15); x25 = L_shr(im[s* 1],SCALEFACTOR15); x26 = L_shr(re[s* 4],SCALEFACTOR15); x27 = L_shr(im[s* 4],SCALEFACTOR15); x28 = L_shr(re[s* 7],SCALEFACTOR15); x29 = L_shr(im[s* 7],SCALEFACTOR15); /* 1. FFT5 stage */ /* real part */ r1 = L_add(x02,x08); r4 = L_sub(x02,x08); r3 = L_add(x04,x06); r2 = L_sub(x04,x06); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y00 = L_add(x00,r1); r1 = L_add(y00,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x03,x09); s4 = L_sub(x03,x09); s3 = L_add(x05,x07); s2 = L_sub(x05,x07); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y01 = L_add(x01,s1); s1 = L_add(y01,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y02 = L_add(r1,s2); y08 = L_sub(r1,s2); y04 = L_sub(r3,s4); y06 = L_add(r3,s4); y03 = L_sub(s1,r2); y09 = L_add(s1,r2); y05 = L_add(s3,r4); y07 = L_sub(s3,r4); /* 2. FFT5 stage */ /* real part */ r1 = L_add(x12,x18); r4 = L_sub(x12,x18); r3 = L_add(x14,x16); r2 = L_sub(x14,x16); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y10 = L_add(x10,r1); r1 = L_add(y10,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x13,x19); s4 = L_sub(x13,x19); s3 = L_add(x15,x17); s2 = L_sub(x15,x17); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y11 = L_add(x11,s1); s1 = L_add(y11,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y12 = L_add(r1,s2); y18 = L_sub(r1,s2); y14 = L_sub(r3,s4); y16 = L_add(r3,s4); y13 = L_sub(s1,r2); y19 = L_add(s1,r2); y15 = L_add(s3,r4); y17 = L_sub(s3,r4); /* 3. FFT5 stage */ /* real part */ r1 = L_add(x22,x28); r4 = L_sub(x22,x28); r3 = L_add(x24,x26); r2 = L_sub(x24,x26); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y20 = L_add(x20,r1); r1 = L_add(y20,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x23,x29); s4 = L_sub(x23,x29); s3 = L_add(x25,x27); s2 = L_sub(x25,x27); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y21 = L_add(x21,s1); s1 = L_add(y21,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y22 = L_add(r1,s2); y28 = L_sub(r1,s2); y24 = L_sub(r3,s4); y26 = L_add(r3,s4); y23 = L_sub(s1,r2); y29 = L_add(s1,r2); y25 = L_add(s3,r4); y27 = L_sub(s3,r4); /* 1. FFT3 stage */ /* real part */ r1 = L_add(y10,y20); r2 = Mpy_32_xx(L_sub(y10,y20),C31); re[s*0] = L_add(y00,r1); move32(); r1 = L_sub(y00,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y11,y21); s2 = Mpy_32_xx(L_sub(y11,y21),C31); im[s*0] = L_add(y01,s1); move32(); s1 = L_sub(y01,L_shr(s1,1)); /* combination */ re[s*10] = L_sub(r1,s2); move32(); re[s* 5] = L_add(r1,s2); move32(); im[s*10] = L_add(s1,r2); move32(); im[s* 5] = L_sub(s1,r2); move32(); /* 2. FFT3 stage */ /* real part */ r1 = L_add(y12,y22); r2 = Mpy_32_xx(L_sub(y12,y22),C31); re[s*6] = L_add(y02,r1); move32(); r1 = L_sub(y02,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y13,y23); s2 = Mpy_32_xx(L_sub(y13,y23),C31); im[s*6] = L_add(y03,s1); move32(); s1 = L_sub(y03,L_shr(s1,1)); /* combination */ re[s* 1] = L_sub(r1,s2); move32(); re[s*11] = L_add(r1,s2); move32(); im[s* 1] = L_add(s1,r2); move32(); im[s*11] = L_sub(s1,r2); move32(); /* 3. FFT3 stage */ /* real part */ r1 = L_add(y14,y24); r2 = Mpy_32_xx(L_sub(y14,y24),C31); re[s*12] = L_add(y04,r1); move32(); r1 = L_sub(y04,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y15,y25); s2 = Mpy_32_xx(L_sub(y15,y25),C31); im[s*12] = L_add(y05,s1); move32(); s1 = L_sub(y05,L_shr(s1,1)); /* combination */ re[s* 7] = L_sub(r1,s2); move32(); re[s* 2] = L_add(r1,s2); move32(); im[s* 7] = L_add(s1,r2); move32(); im[s* 2] = L_sub(s1,r2); move32(); /* 4. FFT3 stage */ /* real part */ r1 = L_add(y16,y26); r2 = Mpy_32_xx(L_sub(y16,y26),C31); re[s*3] = L_add(y06,r1); move32(); r1 = L_sub(y06,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y17,y27); s2 = Mpy_32_xx(L_sub(y17,y27),C31); im[s*3] = L_add(y07,s1); move32(); s1 = L_sub(y07,L_shr(s1,1)); /* combination */ re[s*13] = L_sub(r1,s2); move32(); re[s* 8] = L_add(r1,s2); move32(); im[s*13] = L_add(s1,r2); move32(); im[s* 8] = L_sub(s1,r2); move32(); /* 5. FFT3 stage */ /* real part */ r1 = L_add(y18,y28); r2 = Mpy_32_xx(L_sub(y18,y28),C31); re[s*9] = L_add(y08,r1); move32(); r1 = L_sub(y08,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y19,y29); s2 = Mpy_32_xx(L_sub(y19,y29),C31); im[s*9] = L_add(y09,s1); move32(); s1 = L_sub(y09,L_shr(s1,1)); /* combination */ re[s* 4] = L_sub(r1,s2); move32(); re[s*14] = L_add(r1,s2); move32(); im[s* 4] = L_add(s1,r2); move32(); im[s*14] = L_sub(s1,r2); move32(); } /** * \brief Function performs a complex 16-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR16 bits. * * WOPS with 32x16 bit multiplications (scale on ): 288 cycles * WOPS with 32x16 bit multiplications (scale off): 256 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ void fft16(Word32 *re, Word32 *im, Word16 s, Word16 bScale) { Word32 x0,x1,x2,x3,x4,x5,x6,x7; Word32 t0,t1,t2,t3,t4,t5,t6,t7; Word32 y00,y01,y02,y03,y04,y05,y06,y07; Word32 y08,y09,y10,y11,y12,y13,y14,y15; Word32 y16,y17,y18,y19,y20,y21,y22,y23; Word32 y24,y25,y26,y27,y28,y29,y30,y31; IF (bScale) { x0 = L_shr(re[s* 0],SCALEFACTOR16); x1 = L_shr(im[s* 0],SCALEFACTOR16); x2 = L_shr(re[s* 4],SCALEFACTOR16); x3 = L_shr(im[s* 4],SCALEFACTOR16); x4 = L_shr(re[s* 8],SCALEFACTOR16); x5 = L_shr(im[s* 8],SCALEFACTOR16); x6 = L_shr(re[s*12],SCALEFACTOR16); x7 = L_shr(im[s*12],SCALEFACTOR16); /* Pre-additions */ t0 = L_add(x0,x4); t2 = L_sub(x0,x4); t1 = L_add(x1,x5); t3 = L_sub(x1,x5); t4 = L_add(x2,x6); t7 = L_sub(x2,x6); t5 = L_add(x7,x3); t6 = L_sub(x7,x3); /* Post-additions */ y00 = L_add(t0,t4); y01 = L_add(t1,t5); y02 = L_sub(t2,t6); y03 = L_sub(t3,t7); y04 = L_sub(t0,t4); y05 = L_sub(t1,t5); y06 = L_add(t2,t6); y07 = L_add(t3,t7); x0 = L_shr(re[s* 1],SCALEFACTOR16); x1 = L_shr(im[s* 1],SCALEFACTOR16); x2 = L_shr(re[s* 5],SCALEFACTOR16); x3 = L_shr(im[s* 5],SCALEFACTOR16); x4 = L_shr(re[s* 9],SCALEFACTOR16); x5 = L_shr(im[s* 9],SCALEFACTOR16); x6 = L_shr(re[s*13],SCALEFACTOR16); x7 = L_shr(im[s*13],SCALEFACTOR16); /* Pre-additions */ t0 = L_add(x0,x4); t2 = L_sub(x0,x4); t1 = L_add(x1,x5); t3 = L_sub(x1,x5); t4 = L_add(x2,x6); t7 = L_sub(x2,x6); t5 = L_add(x7,x3); t6 = L_sub(x7,x3); /* Post-additions */ y08 = L_add(t0,t4); y09 = L_add(t1,t5); y10 = L_sub(t2,t6); y11 = L_sub(t3,t7); y12 = L_sub(t0,t4); y13 = L_sub(t1,t5); y14 = L_add(t2,t6); y15 = L_add(t3,t7); x0 = L_shr(re[s* 2],SCALEFACTOR16); x1 = L_shr(im[s* 2],SCALEFACTOR16); x2 = L_shr(re[s* 6],SCALEFACTOR16); x3 = L_shr(im[s* 6],SCALEFACTOR16); x4 = L_shr(re[s*10],SCALEFACTOR16); x5 = L_shr(im[s*10],SCALEFACTOR16); x6 = L_shr(re[s*14],SCALEFACTOR16); x7 = L_shr(im[s*14],SCALEFACTOR16); /* Pre-additions */ t0 = L_add(x0,x4); t2 = L_sub(x0,x4); t1 = L_add(x1,x5); t3 = L_sub(x1,x5); t4 = L_add(x2,x6); t7 = L_sub(x2,x6); t5 = L_add(x7,x3); t6 = L_sub(x7,x3); /* Post-additions */ y16 = L_add(t0,t4); y17 = L_add(t1,t5); y18 = L_sub(t2,t6); y19 = L_sub(t3,t7); y20 = L_sub(t1,t5); y21 = L_sub(t4,t0); y22 = L_add(t2,t6); y23 = L_add(t3,t7); x0 = L_shr(re[s* 3],SCALEFACTOR16); x1 = L_shr(im[s* 3],SCALEFACTOR16); x2 = L_shr(re[s* 7],SCALEFACTOR16); x3 = L_shr(im[s* 7],SCALEFACTOR16); x4 = L_shr(re[s*11],SCALEFACTOR16); x5 = L_shr(im[s*11],SCALEFACTOR16); x6 = L_shr(re[s*15],SCALEFACTOR16); x7 = L_shr(im[s*15],SCALEFACTOR16); /* Pre-additions */ t0 = L_add(x0,x4); t2 = L_sub(x0,x4); t1 = L_add(x1,x5); t3 = L_sub(x1,x5); t4 = L_add(x2,x6); t7 = L_sub(x2,x6); t5 = L_add(x7,x3); t6 = L_sub(x7,x3); /* Post-additions */ y24 = L_add(t0,t4); y25 = L_add(t1,t5); y26 = L_sub(t2,t6); y27 = L_sub(t3,t7); y28 = L_sub(t0,t4); y29 = L_sub(t1,t5); y30 = L_add(t2,t6); y31 = L_add(t3,t7); } ELSE { /* Pre-additions */ t0 = L_add(re[s* 0],re[s* 8]); t2 = L_sub(re[s* 0],re[s* 8]); t1 = L_add(im[s* 0],im[s* 8]); t3 = L_sub(im[s* 0],im[s* 8]); t4 = L_add(re[s* 4],re[s*12]); t7 = L_sub(re[s* 4],re[s*12]); t5 = L_add(im[s*12],im[s* 4]); t6 = L_sub(im[s*12],im[s* 4]); /* Post-additions */ y00 = L_add(t0,t4); y01 = L_add(t1,t5); y02 = L_sub(t2,t6); y03 = L_sub(t3,t7); y04 = L_sub(t0,t4); y05 = L_sub(t1,t5); y06 = L_add(t2,t6); y07 = L_add(t3,t7); /* Pre-additions */ t0 = L_add(re[s* 1],re[s* 9]); t2 = L_sub(re[s* 1],re[s* 9]); t1 = L_add(im[s* 1],im[s* 9]); t3 = L_sub(im[s* 1],im[s* 9]); t4 = L_add(re[s* 5],re[s*13]); t7 = L_sub(re[s* 5],re[s*13]); t5 = L_add(im[s*13],im[s* 5]); t6 = L_sub(im[s*13],im[s* 5]); /* Post-additions */ y08 = L_add(t0,t4); y09 = L_add(t1,t5); y10 = L_sub(t2,t6); y11 = L_sub(t3,t7); y12 = L_sub(t0,t4); y13 = L_sub(t1,t5); y14 = L_add(t2,t6); y15 = L_add(t3,t7); /* Pre-additions */ t0 = L_add(re[s* 2],re[s*10]); t2 = L_sub(re[s* 2],re[s*10]); t1 = L_add(im[s* 2],im[s*10]); t3 = L_sub(im[s* 2],im[s*10]); t4 = L_add(re[s* 6],re[s*14]); t7 = L_sub(re[s* 6],re[s*14]); t5 = L_add(im[s*14],im[s* 6]); t6 = L_sub(im[s*14],im[s* 6]); /* Post-additions */ y16 = L_add(t0,t4); y17 = L_add(t1,t5); y18 = L_sub(t2,t6); y19 = L_sub(t3,t7); y20 = L_sub(t1,t5); y21 = L_sub(t4,t0); y22 = L_add(t2,t6); y23 = L_add(t3,t7); /* Pre-additions */ t0 = L_add(re[s* 3],re[s*11]); t2 = L_sub(re[s* 3],re[s*11]); t1 = L_add(im[s* 3],im[s*11]); t3 = L_sub(im[s* 3],im[s*11]); t4 = L_add(re[s* 7],re[s*15]); t7 = L_sub(re[s* 7],re[s*15]); t5 = L_add(im[s*15],im[s* 7]); t6 = L_sub(im[s*15],im[s* 7]); /* Post-additions */ y24 = L_add(t0,t4); y25 = L_add(t1,t5); y26 = L_sub(t2,t6); y27 = L_sub(t3,t7); y28 = L_sub(t0,t4); y29 = L_sub(t1,t5); y30 = L_add(t2,t6); y31 = L_add(t3,t7); } /* rotation */ x0 = Mpy_32_xx(y22,C162); x1 = Mpy_32_xx(y23,C162); y22 = L_sub(x0,x1); y23 = L_add(x0,x1); x0 = Mpy_32_xx(y28,C162); x1 = Mpy_32_xx(y29,C162); y28 = L_sub(x0,x1); y29 = L_add(x0,x1); x0 = Mpy_32_xx(y12,C161); x1 = Mpy_32_xx(y13,C161); y12 = L_add(x0,x1); y13 = L_sub(x1,x0); x0 = Mpy_32_xx(y18,C161); x1 = Mpy_32_xx(y19,C161); y18 = L_add(x0,x1); y19 = L_sub(x1,x0); x0 = Mpy_32_xx(y10,C163); x1 = Mpy_32_xx(y11,C166); x2 = Mpy_32_xx(y10,C166); x3 = Mpy_32_xx(y11,C163); y10 = L_sub(x0,x1); y11 = L_add(x2,x3); x0 = Mpy_32_xx(y14,C165); x1 = Mpy_32_xx(y15,C164); x2 = Mpy_32_xx(y14,C164); x3 = Mpy_32_xx(y15,C165); y14 = L_sub(x0,x1); y15 = L_add(x2,x3); x0 = Mpy_32_xx(y26,C165); x1 = Mpy_32_xx(y27,C164); x2 = Mpy_32_xx(y26,C164); x3 = Mpy_32_xx(y27,C165); y26 = L_sub(x0,x1); y27 = L_add(x2,x3); x0 = Mpy_32_xx(y30,C164); x1 = Mpy_32_xx(y31,C165); x2 = Mpy_32_xx(y30,C165); x3 = Mpy_32_xx(y31,C164); y30 = L_sub(x0,x1); y31 = L_add(x2,x3); /* Pre-additions */ t0 = L_add(y00,y16); t2 = L_sub(y00,y16); t1 = L_add(y01,y17); t3 = L_sub(y01,y17); t4 = L_add(y08,y24); t7 = L_sub(y08,y24); t5 = L_add(y25,y09); t6 = L_sub(y25,y09); /* Post-additions */ re[s* 0] = L_add(t0,t4); move32(); im[s* 0] = L_add(t1,t5); move32(); re[s* 4] = L_sub(t2,t6); move32(); im[s* 4] = L_sub(t3,t7); move32(); re[s* 8] = L_sub(t0,t4); move32(); im[s* 8] = L_sub(t1,t5); move32(); re[s*12] = L_add(t2,t6); move32(); im[s*12] = L_add(t3,t7); move32(); /* Pre-additions */ t0 = L_add(y02,y18); t2 = L_sub(y02,y18); t1 = L_add(y03,y19); t3 = L_sub(y03,y19); t4 = L_add(y10,y26); t7 = L_sub(y10,y26); t5 = L_add(y27,y11); t6 = L_sub(y27,y11); /* Post-additions */ re[s* 1] = L_add(t0,t4); move32(); im[s* 1] = L_add(t1,t5); move32(); re[s* 5] = L_sub(t2,t6); move32(); im[s* 5] = L_sub(t3,t7); move32(); re[s* 9] = L_sub(t0,t4); move32(); im[s* 9] = L_sub(t1,t5); move32(); re[s*13] = L_add(t2,t6); move32(); im[s*13] = L_add(t3,t7); move32(); /* Pre-additions */ t0 = L_add(y04,y20); t2 = L_sub(y04,y20); t1 = L_add(y05,y21); t3 = L_sub(y05,y21); t4 = L_add(y12,y28); t7 = L_sub(y12,y28); t5 = L_add(y29,y13); t6 = L_sub(y29,y13); /* Post-additions */ re[s* 2] = L_add(t0,t4); move32(); im[s* 2] = L_add(t1,t5); move32(); re[s* 6] = L_sub(t2,t6); move32(); im[s* 6] = L_sub(t3,t7); move32(); re[s*10] = L_sub(t0,t4); move32(); im[s*10] = L_sub(t1,t5); move32(); re[s*14] = L_add(t2,t6); move32(); im[s*14] = L_add(t3,t7); move32(); /* Pre-additions */ t0 = L_add(y06,y22); t2 = L_sub(y06,y22); t1 = L_add(y07,y23); t3 = L_sub(y07,y23); t4 = L_add(y14,y30); t7 = L_sub(y14,y30); t5 = L_add(y31,y15); t6 = L_sub(y31,y15); /* Post-additions */ re[s* 3] = L_add(t0,t4); move32(); im[s* 3] = L_add(t1,t5); move32(); re[s* 7] = L_sub(t2,t6); move32(); im[s* 7] = L_sub(t3,t7); move32(); re[s*11] = L_sub(t0,t4); move32(); im[s*11] = L_sub(t1,t5); move32(); re[s*15] = L_add(t2,t6); move32(); im[s*15] = L_add(t3,t7); move32(); } /** * \brief Function performs a complex 20-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR20 bits. * * WOPS with 32x16 bit multiplications: 432 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft20(Word32 *re, Word32 *im, Word16 s) { Word32 r1,r2,r3,r4; Word32 s1,s2,s3,s4; Word32 x0,x1,x2,x3,x4; Word32 t,t0,t1,t2,t3,t4,t5,t6,t7; Word32 y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; Word32 y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; Word32 y20,y21,y22,y23,y24,y25,y26,y27,y28,y29; Word32 y30,y31,y32,y33,y34,y35,y36,y37,y38,y39; /* */ /* 1. FFT5 stage */ /* real part */ x0 = L_shr(re[s* 0],SCALEFACTOR20); x1 = L_shr(re[s*16],SCALEFACTOR20); x2 = L_shr(re[s*12],SCALEFACTOR20); x3 = L_shr(re[s* 8],SCALEFACTOR20); x4 = L_shr(re[s* 4],SCALEFACTOR20); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x2,x3); r2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y00 = L_add(x0,r1); r1 = L_add(y00,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s* 0],SCALEFACTOR20); x1 = L_shr(im[s*16],SCALEFACTOR20); x2 = L_shr(im[s*12],SCALEFACTOR20); x3 = L_shr(im[s* 8],SCALEFACTOR20); x4 = L_shr(im[s* 4],SCALEFACTOR20); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x2,x3); s2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y01 = L_add(x0,s1); s1 = L_add(y01,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y08 = L_add(r1,s2); y32 = L_sub(r1,s2); y16 = L_sub(r3,s4); y24 = L_add(r3,s4); y09 = L_sub(s1,r2); y33 = L_add(s1,r2); y17 = L_add(s3,r4); y25 = L_sub(s3,r4); /* 2. FFT5 stage */ /* real part */ x0 = L_shr(re[s* 5],SCALEFACTOR20); x1 = L_shr(re[s* 1],SCALEFACTOR20); x2 = L_shr(re[s*17],SCALEFACTOR20); x3 = L_shr(re[s*13],SCALEFACTOR20); x4 = L_shr(re[s* 9],SCALEFACTOR20); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x2,x3); r2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y02 = L_add(x0,r1); r1 = L_add(y02,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s* 5],SCALEFACTOR20); x1 = L_shr(im[s* 1],SCALEFACTOR20); x2 = L_shr(im[s*17],SCALEFACTOR20); x3 = L_shr(im[s*13],SCALEFACTOR20); x4 = L_shr(im[s* 9],SCALEFACTOR20); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x2,x3); s2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y03 = L_add(x0,s1); s1 = L_add(y03,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y10 = L_add(r1,s2); y34 = L_sub(r1,s2); y18 = L_sub(r3,s4); y26 = L_add(r3,s4); y11 = L_sub(s1,r2); y35 = L_add(s1,r2); y19 = L_add(s3,r4); y27 = L_sub(s3,r4); /* 3. FFT5 stage */ /* real part */ x0 = L_shr(re[s*10],SCALEFACTOR20); x1 = L_shr(re[s* 6],SCALEFACTOR20); x2 = L_shr(re[s* 2],SCALEFACTOR20); x3 = L_shr(re[s*18],SCALEFACTOR20); x4 = L_shr(re[s*14],SCALEFACTOR20); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x2,x3); r2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y04 = L_add(x0,r1); r1 = L_add(y04,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s*10],SCALEFACTOR20); x1 = L_shr(im[s* 6],SCALEFACTOR20); x2 = L_shr(im[s* 2],SCALEFACTOR20); x3 = L_shr(im[s*18],SCALEFACTOR20); x4 = L_shr(im[s*14],SCALEFACTOR20); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x2,x3); s2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y05 = L_add(x0,s1); s1 = L_add(y05,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y12 = L_add(r1,s2); y36 = L_sub(r1,s2); y20 = L_sub(r3,s4); y28 = L_add(r3,s4); y13 = L_sub(s1,r2); y37 = L_add(s1,r2); y21 = L_add(s3,r4); y29 = L_sub(s3,r4); /* 4. FFT5 stage */ /* real part */ x0 = L_shr(re[s*15],SCALEFACTOR20); x1 = L_shr(re[s*11],SCALEFACTOR20); x2 = L_shr(re[s* 7],SCALEFACTOR20); x3 = L_shr(re[s* 3],SCALEFACTOR20); x4 = L_shr(re[s*19],SCALEFACTOR20); r1 = L_add(x1,x4); r4 = L_sub(x1,x4); r3 = L_add(x2,x3); r2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y06 = L_add(x0,r1); r1 = L_add(y06,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4, C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ x0 = L_shr(im[s*15],SCALEFACTOR20); x1 = L_shr(im[s*11],SCALEFACTOR20); x2 = L_shr(im[s* 7],SCALEFACTOR20); x3 = L_shr(im[s* 3],SCALEFACTOR20); x4 = L_shr(im[s*19],SCALEFACTOR20); s1 = L_add(x1,x4); s4 = L_sub(x1,x4); s3 = L_add(x2,x3); s2 = L_sub(x2,x3); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y07 = L_add(x0,s1); s1 = L_add(y07,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y14 = L_add(r1,s2); y38 = L_sub(r1,s2); y22 = L_sub(r3,s4); y30 = L_add(r3,s4); y15 = L_sub(s1,r2); y39 = L_add(s1,r2); y23 = L_add(s3,r4); y31 = L_sub(s3,r4); /* 1. FFT4 stage */ /* Pre-additions */ t0 = L_add(y00,y04); t2 = L_sub(y00,y04); t1 = L_add(y01,y05); t3 = L_sub(y01,y05); t4 = L_add(y02,y06); t7 = L_sub(y02,y06); t5 = L_add(y07,y03); t6 = L_sub(y07,y03); /* Post-additions */ re[s* 0] = L_add(t0,t4); move32(); im[s* 0] = L_add(t1,t5); move32(); re[s* 5] = L_sub(t2,t6); move32(); im[s* 5] = L_sub(t3,t7); move32(); re[s*10] = L_sub(t0,t4); move32(); im[s*10] = L_sub(t1,t5); move32(); re[s*15] = L_add(t2,t6); move32(); im[s*15] = L_add(t3,t7); move32(); /* 2. FFT4 stage */ /* Pre-additions */ t0 = L_add(y08,y12); t2 = L_sub(y08,y12); t1 = L_add(y09,y13); t3 = L_sub(y09,y13); t4 = L_add(y10,y14); t7 = L_sub(y10,y14); t5 = L_add(y15,y11); t6 = L_sub(y15,y11); /* Post-additions */ re[s* 4] = L_add(t0,t4); move32(); im[s* 4] = L_add(t1,t5); move32(); re[s* 9] = L_sub(t2,t6); move32(); im[s* 9] = L_sub(t3,t7); move32(); re[s*14] = L_sub(t0,t4); move32(); im[s*14] = L_sub(t1,t5); move32(); re[s*19] = L_add(t2,t6); move32(); im[s*19] = L_add(t3,t7); move32(); /* 3. FFT4 stage */ /* Pre-additions */ t0 = L_add(y16,y20); t2 = L_sub(y16,y20); t1 = L_add(y17,y21); t3 = L_sub(y17,y21); t4 = L_add(y18,y22); t7 = L_sub(y18,y22); t5 = L_add(y23,y19); t6 = L_sub(y23,y19); /* Post-additions */ re[s* 8] = L_add(t0,t4); move32(); im[s* 8] = L_add(t1,t5); move32(); re[s*13] = L_sub(t2,t6); move32(); im[s*13] = L_sub(t3,t7); move32(); re[s*18] = L_sub(t0,t4); move32(); im[s*18] = L_sub(t1,t5); move32(); re[s* 3] = L_add(t2,t6); move32(); im[s* 3] = L_add(t3,t7); move32(); /* 4. FFT4 stage */ /* Pre-additions */ t0 = L_add(y24,y28); t2 = L_sub(y24,y28); t1 = L_add(y25,y29); t3 = L_sub(y25,y29); t4 = L_add(y26,y30); t7 = L_sub(y26,y30); t5 = L_add(y31,y27); t6 = L_sub(y31,y27); /* Post-additions */ re[s*12] = L_add(t0,t4); move32(); im[s*12] = L_add(t1,t5); move32(); re[s*17] = L_sub(t2,t6); move32(); im[s*17] = L_sub(t3,t7); move32(); re[s* 2] = L_sub(t0,t4); move32(); im[s* 2] = L_sub(t1,t5); move32(); re[s* 7] = L_add(t2,t6); move32(); im[s* 7] = L_add(t3,t7); move32(); /* 5. FFT4 stage */ /* Pre-additions */ t0 = L_add(y32,y36); t2 = L_sub(y32,y36); t1 = L_add(y33,y37); t3 = L_sub(y33,y37); t4 = L_add(y34,y38); t7 = L_sub(y34,y38); t5 = L_add(y39,y35); t6 = L_sub(y39,y35); /* Post-additions */ re[s*16] = L_add(t0,t4); move32(); im[s*16] = L_add(t1,t5); move32(); re[s* 1] = L_sub(t2,t6); move32(); im[s* 1] = L_sub(t3,t7); move32(); re[s* 6] = L_sub(t0,t4); move32(); im[s* 6] = L_sub(t1,t5); move32(); re[s*11] = L_add(t2,t6); move32(); im[s*11] = L_add(t3,t7); move32(); /* */ } /** * \brief Function performs a complex 30-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR30 bits. * * WOPS with 32x16 bit multiplications: 828 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft30(Word32 *re, Word32 *im, Word16 s) { Word32 t; Word32 r1,r2,r3,r4; Word32 s1,s2,s3,s4; Word32 x00,x01,x02,x03,x04,x05,x06,x07,x08,x09; Word32 x10,x11,x12,x13,x14,x15,x16,x17,x18,x19; Word32 x20,x21,x22,x23,x24,x25,x26,x27,x28,x29; Word32 y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; Word32 y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; Word32 y20,y21,y22,y23,y24,y25,y26,y27,y28,y29; Word32 z00,z01,z02,z03,z04,z05,z06,z07,z08,z09; Word32 z10,z11,z12,z13,z14,z15,z16,z17,z18,z19; Word32 z20,z21,z22,z23,z24,z25,z26,z27,z28,z29; Word32 z30,z31,z32,z33,z34,z35,z36,z37,z38,z39; Word32 z40,z41,z42,z43,z44,z45,z46,z47,z48,z49; Word32 z50,z51,z52,z53,z54,z55,z56,z57,z58,z59; Word32 *rel = &re[s* 0]; Word32 *reh = &re[s*15]; Word32 *iml = &im[s* 0]; Word32 *imh = &im[s*15]; /* 1. FFT15 stage */ x00 = L_shr(re[s* 0],SCALEFACTOR30_1); x01 = L_shr(im[s* 0],SCALEFACTOR30_1); x02 = L_shr(re[s*18],SCALEFACTOR30_1); x03 = L_shr(im[s*18],SCALEFACTOR30_1); x04 = L_shr(re[s* 6],SCALEFACTOR30_1); x05 = L_shr(im[s* 6],SCALEFACTOR30_1); x06 = L_shr(re[s*24],SCALEFACTOR30_1); x07 = L_shr(im[s*24],SCALEFACTOR30_1); x08 = L_shr(re[s*12],SCALEFACTOR30_1); x09 = L_shr(im[s*12],SCALEFACTOR30_1); x10 = L_shr(re[s*20],SCALEFACTOR30_1); x11 = L_shr(im[s*20],SCALEFACTOR30_1); x12 = L_shr(re[s* 8],SCALEFACTOR30_1); x13 = L_shr(im[s* 8],SCALEFACTOR30_1); x14 = L_shr(re[s*26],SCALEFACTOR30_1); x15 = L_shr(im[s*26],SCALEFACTOR30_1); x16 = L_shr(re[s*14],SCALEFACTOR30_1); x17 = L_shr(im[s*14],SCALEFACTOR30_1); x18 = L_shr(re[s* 2],SCALEFACTOR30_1); x19 = L_shr(im[s* 2],SCALEFACTOR30_1); x20 = L_shr(re[s*10],SCALEFACTOR30_1); x21 = L_shr(im[s*10],SCALEFACTOR30_1); x22 = L_shr(re[s*28],SCALEFACTOR30_1); x23 = L_shr(im[s*28],SCALEFACTOR30_1); x24 = L_shr(re[s*16],SCALEFACTOR30_1); x25 = L_shr(im[s*16],SCALEFACTOR30_1); x26 = L_shr(re[s* 4],SCALEFACTOR30_1); x27 = L_shr(im[s* 4],SCALEFACTOR30_1); x28 = L_shr(re[s*22],SCALEFACTOR30_1); x29 = L_shr(im[s*22],SCALEFACTOR30_1); /* 1. FFT5 stage */ /* real part */ r1 = L_add(x02,x08); r4 = L_sub(x02,x08); r3 = L_add(x04,x06); r2 = L_sub(x04,x06); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y00 = L_add(x00,r1); r1 = L_add(y00,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x03,x09); s4 = L_sub(x03,x09); s3 = L_add(x05,x07); s2 = L_sub(x05,x07); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y01 = L_add(x01,s1); s1 = L_add(y01,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y02 = L_add(r1,s2); y08 = L_sub(r1,s2); y04 = L_sub(r3,s4); y06 = L_add(r3,s4); y03 = L_sub(s1,r2); y09 = L_add(s1,r2); y05 = L_add(s3,r4); y07 = L_sub(s3,r4); /* 2. FFT5 stage */ /* real part */ r1 = L_add(x12,x18); r4 = L_sub(x12,x18); r3 = L_add(x14,x16); r2 = L_sub(x14,x16); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y10 = L_add(x10,r1); r1 = L_add(y10,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x13,x19); s4 = L_sub(x13,x19); s3 = L_add(x15,x17); s2 = L_sub(x15,x17); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y11 = L_add(x11,s1); s1 = L_add(y11,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y12 = L_add(r1,s2); y18 = L_sub(r1,s2); y14 = L_sub(r3,s4); y16 = L_add(r3,s4); y13 = L_sub(s1,r2); y19 = L_add(s1,r2); y15 = L_add(s3,r4); y17 = L_sub(s3,r4); /* 3. FFT5 stage */ /* real part */ r1 = L_add(x22,x28); r4 = L_sub(x22,x28); r3 = L_add(x24,x26); r2 = L_sub(x24,x26); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y20 = L_add(x20,r1); r1 = L_add(y20,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x23,x29); s4 = L_sub(x23,x29); s3 = L_add(x25,x27); s2 = L_sub(x25,x27); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y21 = L_add(x21,s1); s1 = L_add(y21,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y22 = L_add(r1,s2); y28 = L_sub(r1,s2); y24 = L_sub(r3,s4); y26 = L_add(r3,s4); y23 = L_sub(s1,r2); y29 = L_add(s1,r2); y25 = L_add(s3,r4); y27 = L_sub(s3,r4); /* 1. FFT3 stage */ /* real part */ r1 = L_add(y10,y20); r2 = Mpy_32_xx(L_sub(y10,y20),C31); z00 = L_add(y00,r1); r1 = L_sub(y00,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y11,y21); s2 = Mpy_32_xx(L_sub(y11,y21),C31); z01 = L_add(y01,s1); s1 = L_sub(y01,L_shr(s1,1)); /* combination */ z20 = L_sub(r1,s2); z10 = L_add(r1,s2); z21 = L_add(s1,r2); z11 = L_sub(s1,r2); /* 2. FFT3 stage */ /* real part */ r1 = L_add(y12,y22); r2 = Mpy_32_xx(L_sub(y12,y22),C31); z12 = L_add(y02,r1); r1 = L_sub(y02,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y13,y23); s2 = Mpy_32_xx(L_sub(y13,y23),C31); z13 = L_add(y03,s1); s1 = L_sub(y03,L_shr(s1,1)); /* combination */ z02 = L_sub(r1,s2); z22 = L_add(r1,s2); z03 = L_add(s1,r2); z23 = L_sub(s1,r2); /* 3. FFT3 stage */ /* real part */ r1 = L_add(y14,y24); r2 = Mpy_32_xx(L_sub(y14,y24),C31); z24 = L_add(y04,r1); r1 = L_sub(y04,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y15,y25); s2 = Mpy_32_xx(L_sub(y15,y25),C31); z25 = L_add(y05,s1); s1 = L_sub(y05,L_shr(s1,1)); /* combination */ z14 = L_sub(r1,s2); z04 = L_add(r1,s2); z15 = L_add(s1,r2); z05 = L_sub(s1,r2); /* 4. FFT3 stage */ /* real part */ r1 = L_add(y16,y26); r2 = Mpy_32_xx(L_sub(y16,y26),C31); z06 = L_add(y06,r1); r1 = L_sub(y06,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y17,y27); s2 = Mpy_32_xx(L_sub(y17,y27),C31); z07 = L_add(y07,s1); s1 = L_sub(y07,L_shr(s1,1)); /* combination */ z26 = L_sub(r1,s2); z16 = L_add(r1,s2); z27 = L_add(s1,r2); z17 = L_sub(s1,r2); /* 5. FFT3 stage */ /* real part */ r1 = L_add(y18,y28); r2 = Mpy_32_xx(L_sub(y18,y28),C31); z18 = L_add(y08,r1); r1 = L_sub(y08,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y19,y29); s2 = Mpy_32_xx(L_sub(y19,y29),C31); z19 = L_add(y09,s1); s1 = L_sub(y09,L_shr(s1,1)); /* combination */ z08 = L_sub(r1,s2); z28 = L_add(r1,s2); z09 = L_add(s1,r2); z29 = L_sub(s1,r2); /* 2. FFT15 stage */ x00 = L_shr(re[s*15],SCALEFACTOR30_1); x01 = L_shr(im[s*15],SCALEFACTOR30_1); x02 = L_shr(re[s* 3],SCALEFACTOR30_1); x03 = L_shr(im[s* 3],SCALEFACTOR30_1); x04 = L_shr(re[s*21],SCALEFACTOR30_1); x05 = L_shr(im[s*21],SCALEFACTOR30_1); x06 = L_shr(re[s* 9],SCALEFACTOR30_1); x07 = L_shr(im[s* 9],SCALEFACTOR30_1); x08 = L_shr(re[s*27],SCALEFACTOR30_1); x09 = L_shr(im[s*27],SCALEFACTOR30_1); x10 = L_shr(re[s* 5],SCALEFACTOR30_1); x11 = L_shr(im[s* 5],SCALEFACTOR30_1); x12 = L_shr(re[s*23],SCALEFACTOR30_1); x13 = L_shr(im[s*23],SCALEFACTOR30_1); x14 = L_shr(re[s*11],SCALEFACTOR30_1); x15 = L_shr(im[s*11],SCALEFACTOR30_1); x16 = L_shr(re[s*29],SCALEFACTOR30_1); x17 = L_shr(im[s*29],SCALEFACTOR30_1); x18 = L_shr(re[s*17],SCALEFACTOR30_1); x19 = L_shr(im[s*17],SCALEFACTOR30_1); x20 = L_shr(re[s*25],SCALEFACTOR30_1); x21 = L_shr(im[s*25],SCALEFACTOR30_1); x22 = L_shr(re[s*13],SCALEFACTOR30_1); x23 = L_shr(im[s*13],SCALEFACTOR30_1); x24 = L_shr(re[s* 1],SCALEFACTOR30_1); x25 = L_shr(im[s* 1],SCALEFACTOR30_1); x26 = L_shr(re[s*19],SCALEFACTOR30_1); x27 = L_shr(im[s*19],SCALEFACTOR30_1); x28 = L_shr(re[s* 7],SCALEFACTOR30_1); x29 = L_shr(im[s* 7],SCALEFACTOR30_1); /* 1. FFT5 stage */ /* real part */ r1 = L_add(x02,x08); r4 = L_sub(x02,x08); r3 = L_add(x04,x06); r2 = L_sub(x04,x06); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y00 = L_add(x00,r1); r1 = L_add(y00,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x03,x09); s4 = L_sub(x03,x09); s3 = L_add(x05,x07); s2 = L_sub(x05,x07); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y01 = L_add(x01,s1); s1 = L_add(y01,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y02 = L_add(r1,s2); y08 = L_sub(r1,s2); y04 = L_sub(r3,s4); y06 = L_add(r3,s4); y03 = L_sub(s1,r2); y09 = L_add(s1,r2); y05 = L_add(s3,r4); y07 = L_sub(s3,r4); /* 2. FFT5 stage */ /* real part */ r1 = L_add(x12,x18); r4 = L_sub(x12,x18); r3 = L_add(x14,x16); r2 = L_sub(x14,x16); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y10 = L_add(x10,r1); r1 = L_add(y10,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x13,x19); s4 = L_sub(x13,x19); s3 = L_add(x15,x17); s2 = L_sub(x15,x17); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y11 = L_add(x11,s1); s1 = L_add(y11,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y12 = L_add(r1,s2); y18 = L_sub(r1,s2); y14 = L_sub(r3,s4); y16 = L_add(r3,s4); y13 = L_sub(s1,r2); y19 = L_add(s1,r2); y15 = L_add(s3,r4); y17 = L_sub(s3,r4); /* 3. FFT5 stage */ /* real part */ r1 = L_add(x22,x28); r4 = L_sub(x22,x28); r3 = L_add(x24,x26); r2 = L_sub(x24,x26); t = Mpy_32_xx(L_sub(r1,r3),C54); r1 = L_add(r1,r3); y20 = L_add(x20,r1); r1 = L_add(y20,(L_shl(Mpy_32_xx(r1,C55),1))); r3 = L_sub(r1,t); r1 = L_add(r1,t); t = Mpy_32_xx((L_add(r4,r2)),C51); r4 = L_add(t,L_shl(Mpy_32_xx(r4,C52),1)); r2 = L_add(t,Mpy_32_xx(r2,C53)); /* imaginary part */ s1 = L_add(x23,x29); s4 = L_sub(x23,x29); s3 = L_add(x25,x27); s2 = L_sub(x25,x27); t = Mpy_32_xx(L_sub(s1,s3),C54); s1 = L_add(s1,s3); y21 = L_add(x21,s1); s1 = L_add(y21,L_shl(Mpy_32_xx(s1,C55),1)); s3 = L_sub(s1,t); s1 = L_add(s1,t); t = Mpy_32_xx(L_add(s4,s2),C51); s4 = L_add(t,L_shl(Mpy_32_xx(s4,C52),1)); s2 = L_add(t,Mpy_32_xx(s2,C53)); /* combination */ y22 = L_add(r1,s2); y28 = L_sub(r1,s2); y24 = L_sub(r3,s4); y26 = L_add(r3,s4); y23 = L_sub(s1,r2); y29 = L_add(s1,r2); y25 = L_add(s3,r4); y27 = L_sub(s3,r4); /* 1. FFT3 stage */ /* real part */ r1 = L_add(y10,y20); r2 = Mpy_32_xx(L_sub(y10,y20),C31); z30 = L_add(y00,r1); r1 = L_sub(y00,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y11,y21); s2 = Mpy_32_xx(L_sub(y11,y21),C31); z31 = L_add(y01,s1); s1 = L_sub(y01,L_shr(s1,1)); /* combination */ z50 = L_sub(r1,s2); z40 = L_add(r1,s2); z51 = L_add(s1,r2); z41 = L_sub(s1,r2); /* 2. FFT3 stage */ /* real part */ r1 = L_add(y12,y22); r2 = Mpy_32_xx(L_sub(y12,y22),C31); z42 = L_add(y02,r1); r1 = L_sub(y02,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y13,y23); s2 = Mpy_32_xx(L_sub(y13,y23),C31); z43 = L_add(y03,s1); s1 = L_sub(y03,L_shr(s1,1)); /* combination */ z32 = L_sub(r1,s2); z52 = L_add(r1,s2); z33 = L_add(s1,r2); z53 = L_sub(s1,r2); /* 3. FFT3 stage */ /* real part */ r1 = L_add(y14,y24); r2 = Mpy_32_xx(L_sub(y14,y24),C31); z54 = L_add(y04,r1); r1 = L_sub(y04,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y15,y25); s2 = Mpy_32_xx(L_sub(y15,y25),C31); z55 = L_add(y05,s1); s1 = L_sub(y05,L_shr(s1,1)); /* combination */ z44 = L_sub(r1,s2); z34 = L_add(r1,s2); z45 = L_add(s1,r2); z35 = L_sub(s1,r2); /* 4. FFT3 stage */ /* real part */ r1 = L_add(y16,y26); r2 = Mpy_32_xx(L_sub(y16,y26),C31); z36 = L_add(y06,r1); r1 = L_sub(y06,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y17,y27); s2 = Mpy_32_xx(L_sub(y17,y27),C31); z37 = L_add(y07,s1); s1 = L_sub(y07,L_shr(s1,1)); /* combination */ z56 = L_sub(r1,s2); z46 = L_add(r1,s2); z57 = L_add(s1,r2); z47 = L_sub(s1,r2); /* 5. FFT3 stage */ /* real part */ r1 = L_add(y18,y28); r2 = Mpy_32_xx(L_sub(y18,y28),C31); z48 = L_add(y08,r1); r1 = L_sub(y08,L_shr(r1,1)); /* imaginary part */ s1 = L_add(y19,y29); s2 = Mpy_32_xx(L_sub(y19,y29),C31); z49 = L_add(y09,s1); s1 = L_sub(y09,L_shr(s1,1)); /* combination */ z38 = L_sub(r1,s2); z58 = L_add(r1,s2); z39 = L_add(s1,r2); z59 = L_sub(s1,r2); /* 1. FFT2 stage */ r1 = L_shr(z00,SCALEFACTOR30_2); r2 = L_shr(z30,SCALEFACTOR30_2); r3 = L_shr(z01,SCALEFACTOR30_2); r4 = L_shr(z31,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 2. FFT2 stage */ r1 = L_shr(z16,SCALEFACTOR30_2); r2 = L_shr(z46,SCALEFACTOR30_2); r3 = L_shr(z17,SCALEFACTOR30_2); r4 = L_shr(z47,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 3. FFT2 stage */ r1 = L_shr(z02,SCALEFACTOR30_2); r2 = L_shr(z32,SCALEFACTOR30_2); r3 = L_shr(z03,SCALEFACTOR30_2); r4 = L_shr(z33,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 4. FFT2 stage */ r1 = L_shr(z18,SCALEFACTOR30_2); r2 = L_shr(z48,SCALEFACTOR30_2); r3 = L_shr(z19,SCALEFACTOR30_2); r4 = L_shr(z49,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 5. FFT2 stage */ r1 = L_shr(z04,SCALEFACTOR30_2); r2 = L_shr(z34,SCALEFACTOR30_2); r3 = L_shr(z05,SCALEFACTOR30_2); r4 = L_shr(z35,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 6. FFT2 stage */ r1 = L_shr(z20,SCALEFACTOR30_2); r2 = L_shr(z50,SCALEFACTOR30_2); r3 = L_shr(z21,SCALEFACTOR30_2); r4 = L_shr(z51,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 7. FFT2 stage */ r1 = L_shr(z06,SCALEFACTOR30_2); r2 = L_shr(z36,SCALEFACTOR30_2); r3 = L_shr(z07,SCALEFACTOR30_2); r4 = L_shr(z37,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 8. FFT2 stage */ r1 = L_shr(z22,SCALEFACTOR30_2); r2 = L_shr(z52,SCALEFACTOR30_2); r3 = L_shr(z23,SCALEFACTOR30_2); r4 = L_shr(z53,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 9. FFT2 stage */ r1 = L_shr(z08,SCALEFACTOR30_2); r2 = L_shr(z38,SCALEFACTOR30_2); r3 = L_shr(z09,SCALEFACTOR30_2); r4 = L_shr(z39,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 10. FFT2 stage */ r1 = L_shr(z24,SCALEFACTOR30_2); r2 = L_shr(z54,SCALEFACTOR30_2); r3 = L_shr(z25,SCALEFACTOR30_2); r4 = L_shr(z55,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 11. FFT2 stage */ r1 = L_shr(z10,SCALEFACTOR30_2); r2 = L_shr(z40,SCALEFACTOR30_2); r3 = L_shr(z11,SCALEFACTOR30_2); r4 = L_shr(z41,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 12. FFT2 stage */ r1 = L_shr(z26,SCALEFACTOR30_2); r2 = L_shr(z56,SCALEFACTOR30_2); r3 = L_shr(z27,SCALEFACTOR30_2); r4 = L_shr(z57,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 13. FFT2 stage */ r1 = L_shr(z12,SCALEFACTOR30_2); r2 = L_shr(z42,SCALEFACTOR30_2); r3 = L_shr(z13,SCALEFACTOR30_2); r4 = L_shr(z43,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 14. FFT2 stage */ r1 = L_shr(z28,SCALEFACTOR30_2); r2 = L_shr(z58,SCALEFACTOR30_2); r3 = L_shr(z29,SCALEFACTOR30_2); r4 = L_shr(z59,SCALEFACTOR30_2); *reh = L_add(r1,r2); move32(); *rel = L_sub(r1,r2); move32(); *imh = L_add(r3,r4); move32(); *iml = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; /* 15. FFT2 stage */ r1 = L_shr(z14,SCALEFACTOR30_2); r2 = L_shr(z44,SCALEFACTOR30_2); r3 = L_shr(z15,SCALEFACTOR30_2); r4 = L_shr(z45,SCALEFACTOR30_2); *rel = L_add(r1,r2); move32(); *reh = L_sub(r1,r2); move32(); *iml = L_add(r3,r4); move32(); *imh = L_sub(r3,r4); move32(); rel+=s, reh+=s, iml+=s; imh+=s; } /** * \brief Function performs a complex 32-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR32 bits. * * WOPS with 32x16 bit multiplications: 752 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft32(Word32 * re, Word32 * im, Word16 s) { Word32 as,bs; Word32 x00,x01,x02,x03,x04,x05,x06,x07; Word32 x08,x09,x10,x11,x12,x13,x14,x15; Word32 t00,t01,t02,t03,t04,t05,t06,t07; Word32 t08,t09,t10,t11,t12,t13,t14,t15; Word32 s00,s01,s02,s03,s04,s05,s06,s07; Word32 s08,s09,s10,s11,s12,s13,s14,s15; Word32 y00,y01,y02,y03,y04,y05,y06,y07; Word32 y08,y09,y10,y11,y12,y13,y14,y15; Word32 y16,y17,y18,y19,y20,y21,y22,y23; Word32 y24,y25,y26,y27,y28,y29,y30,y31; Word32 y32,y33,y34,y35,y36,y37,y38,y39; Word32 y40,y41,y42,y43,y44,y45,y46,y47; Word32 y48,y49,y50,y51,y52,y53,y54,y55; Word32 y56,y57,y58,y59,y60,y61,y62,y63; /* 1. FFT8 stage */ x00 = L_shr(re[s* 0],SCALEFACTOR32_1); x01 = L_shr(im[s* 0],SCALEFACTOR32_1); x02 = L_shr(re[s* 4],SCALEFACTOR32_1); x03 = L_shr(im[s* 4],SCALEFACTOR32_1); x04 = L_shr(re[s* 8],SCALEFACTOR32_1); x05 = L_shr(im[s* 8],SCALEFACTOR32_1); x06 = L_shr(re[s*12],SCALEFACTOR32_1); x07 = L_shr(im[s*12],SCALEFACTOR32_1); x08 = L_shr(re[s*16],SCALEFACTOR32_1); x09 = L_shr(im[s*16],SCALEFACTOR32_1); x10 = L_shr(re[s*20],SCALEFACTOR32_1); x11 = L_shr(im[s*20],SCALEFACTOR32_1); x12 = L_shr(re[s*24],SCALEFACTOR32_1); x13 = L_shr(im[s*24],SCALEFACTOR32_1); x14 = L_shr(re[s*28],SCALEFACTOR32_1); x15 = L_shr(im[s*28],SCALEFACTOR32_1); t00 = L_add(x00,x08); t02 = L_sub(x00,x08); t01 = L_add(x01,x09); t03 = L_sub(x01,x09); t04 = L_add(x02,x10); t06 = L_sub(x02,x10); t05 = L_add(x03,x11); t07 = L_sub(x03,x11); t08 = L_add(x04,x12); t10 = L_sub(x04,x12); t09 = L_add(x05,x13); t11 = L_sub(x05,x13); t12 = L_add(x06,x14); t14 = L_sub(x06,x14); t13 = L_add(x07,x15); t15 = L_sub(x07,x15); /* Pre-additions and core multiplications */ s00 = L_add(t00,t08); s04 = L_sub(t00,t08); s01 = L_add(t01,t09); s05 = L_sub(t01,t09); s08 = L_sub(t02,t11); s10 = L_add(t02,t11); s09 = L_add(t03,t10); s11 = L_sub(t03,t10); s02 = L_add(t04,t12); s07 = L_sub(t04,t12); s03 = L_add(t05,t13); s06 = L_sub(t13,t05); t01 = L_add(t06,t14); t02 = L_sub(t06,t14); t00 = L_add(t07,t15); t03 = L_sub(t07,t15); s12 = Mpy_32_xx(L_add(t00,t02),C81); s14 = Mpy_32_xx(L_sub(t00,t02),C81); s13 = Mpy_32_xx(L_sub(t03,t01),C81); s15 = Mpy_32_xx(L_add(t01,t03),C82); /* Post-additions */ y00 = L_add(s00,s02); y08 = L_sub(s00,s02); y01 = L_add(s01,s03); y09 = L_sub(s01,s03); y04 = L_sub(s04,s06); y12 = L_add(s04,s06); y05 = L_sub(s05,s07); y13 = L_add(s05,s07); y06 = L_add(s08,s14); y14 = L_sub(s08,s14); y07 = L_add(s09,s15); y15 = L_sub(s09,s15); y02 = L_add(s10,s12); y10 = L_sub(s10,s12); y03 = L_add(s11,s13); y11 = L_sub(s11,s13); /* 2. FFT8 stage */ x00 = L_shr(re[s* 1],SCALEFACTOR32_1); x01 = L_shr(im[s* 1],SCALEFACTOR32_1); x02 = L_shr(re[s* 5],SCALEFACTOR32_1); x03 = L_shr(im[s* 5],SCALEFACTOR32_1); x04 = L_shr(re[s* 9],SCALEFACTOR32_1); x05 = L_shr(im[s* 9],SCALEFACTOR32_1); x06 = L_shr(re[s*13],SCALEFACTOR32_1); x07 = L_shr(im[s*13],SCALEFACTOR32_1); x08 = L_shr(re[s*17],SCALEFACTOR32_1); x09 = L_shr(im[s*17],SCALEFACTOR32_1); x10 = L_shr(re[s*21],SCALEFACTOR32_1); x11 = L_shr(im[s*21],SCALEFACTOR32_1); x12 = L_shr(re[s*25],SCALEFACTOR32_1); x13 = L_shr(im[s*25],SCALEFACTOR32_1); x14 = L_shr(re[s*29],SCALEFACTOR32_1); x15 = L_shr(im[s*29],SCALEFACTOR32_1); t00 = L_add(x00,x08); t02 = L_sub(x00,x08); t01 = L_add(x01,x09); t03 = L_sub(x01,x09); t04 = L_add(x02,x10); t06 = L_sub(x02,x10); t05 = L_add(x03,x11); t07 = L_sub(x03,x11); t08 = L_add(x04,x12); t10 = L_sub(x04,x12); t09 = L_add(x05,x13); t11 = L_sub(x05,x13); t12 = L_add(x06,x14); t14 = L_sub(x06,x14); t13 = L_add(x07,x15); t15 = L_sub(x07,x15); /* Pre-additions and core multiplications */ s00 = L_add(t00,t08); s04 = L_sub(t00,t08); s01 = L_add(t01,t09); s05 = L_sub(t01,t09); s08 = L_sub(t02,t11); s10 = L_add(t02,t11); s09 = L_add(t03,t10); s11 = L_sub(t03,t10); s02 = L_add(t04,t12); s07 = L_sub(t04,t12); s03 = L_add(t05,t13); s06 = L_sub(t13,t05); t01 = L_add(t06,t14); t02 = L_sub(t06,t14); t00 = L_add(t07,t15); t03 = L_sub(t07,t15); s12 = Mpy_32_xx(L_add(t00,t02),C81); s14 = Mpy_32_xx(L_sub(t00,t02),C81); s13 = Mpy_32_xx(L_sub(t03,t01),C81); s15 = Mpy_32_xx(L_add(t01,t03),C82); /* Post-additions */ y16 = L_add(s00,s02); y24 = L_sub(s00,s02); y17 = L_add(s01,s03); y25 = L_sub(s01,s03); y20 = L_sub(s04,s06); y28 = L_add(s04,s06); y21 = L_sub(s05,s07); y29 = L_add(s05,s07); y22 = L_add(s08,s14); y30 = L_sub(s08,s14); y23 = L_add(s09,s15); y31 = L_sub(s09,s15); y18 = L_add(s10,s12); y26 = L_sub(s10,s12); y19 = L_add(s11,s13); y27 = L_sub(s11,s13); /* 3. FFT8 stage */ x00 = L_shr(re[s* 2],SCALEFACTOR32_1); x01 = L_shr(im[s* 2],SCALEFACTOR32_1); x02 = L_shr(re[s* 6],SCALEFACTOR32_1); x03 = L_shr(im[s* 6],SCALEFACTOR32_1); x04 = L_shr(re[s*10],SCALEFACTOR32_1); x05 = L_shr(im[s*10],SCALEFACTOR32_1); x06 = L_shr(re[s*14],SCALEFACTOR32_1); x07 = L_shr(im[s*14],SCALEFACTOR32_1); x08 = L_shr(re[s*18],SCALEFACTOR32_1); x09 = L_shr(im[s*18],SCALEFACTOR32_1); x10 = L_shr(re[s*22],SCALEFACTOR32_1); x11 = L_shr(im[s*22],SCALEFACTOR32_1); x12 = L_shr(re[s*26],SCALEFACTOR32_1); x13 = L_shr(im[s*26],SCALEFACTOR32_1); x14 = L_shr(re[s*30],SCALEFACTOR32_1); x15 = L_shr(im[s*30],SCALEFACTOR32_1); t00 = L_add(x00,x08); t02 = L_sub(x00,x08); t01 = L_add(x01,x09); t03 = L_sub(x01,x09); t04 = L_add(x02,x10); t06 = L_sub(x02,x10); t05 = L_add(x03,x11); t07 = L_sub(x03,x11); t08 = L_add(x04,x12); t10 = L_sub(x04,x12); t09 = L_add(x05,x13); t11 = L_sub(x05,x13); t12 = L_add(x06,x14); t14 = L_sub(x06,x14); t13 = L_add(x07,x15); t15 = L_sub(x07,x15); /* Pre-additions and core multiplications */ s00 = L_add(t00,t08); s04 = L_sub(t00,t08); s01 = L_add(t01,t09); s05 = L_sub(t01,t09); s08 = L_sub(t02,t11); s10 = L_add(t02,t11); s09 = L_add(t03,t10); s11 = L_sub(t03,t10); s02 = L_add(t04,t12); s07 = L_sub(t04,t12); s03 = L_add(t05,t13); s06 = L_sub(t13,t05); t01 = L_add(t06,t14); t02 = L_sub(t06,t14); t00 = L_add(t07,t15); t03 = L_sub(t07,t15); s12 = Mpy_32_xx(L_add(t00,t02),C81); s14 = Mpy_32_xx(L_sub(t00,t02),C81); s13 = Mpy_32_xx(L_sub(t03,t01),C81); s15 = Mpy_32_xx(L_add(t01,t03),C82); /* Post-additions */ y32 = L_add(s00,s02); y40 = L_sub(s00,s02); y33 = L_add(s01,s03); y41 = L_sub(s01,s03); y36 = L_sub(s04,s06); y44 = L_add(s04,s06); y37 = L_sub(s05,s07); y45 = L_add(s05,s07); y38 = L_add(s08,s14); y46 = L_sub(s08,s14); y39 = L_add(s09,s15); y47 = L_sub(s09,s15); y34 = L_add(s10,s12); y42 = L_sub(s10,s12); y35 = L_add(s11,s13); y43 = L_sub(s11,s13); /* 4. FFT8 stage */ x00 = L_shr(re[s* 3],SCALEFACTOR32_1); x01 = L_shr(im[s* 3],SCALEFACTOR32_1); x02 = L_shr(re[s* 7],SCALEFACTOR32_1); x03 = L_shr(im[s* 7],SCALEFACTOR32_1); x04 = L_shr(re[s*11],SCALEFACTOR32_1); x05 = L_shr(im[s*11],SCALEFACTOR32_1); x06 = L_shr(re[s*15],SCALEFACTOR32_1); x07 = L_shr(im[s*15],SCALEFACTOR32_1); x08 = L_shr(re[s*19],SCALEFACTOR32_1); x09 = L_shr(im[s*19],SCALEFACTOR32_1); x10 = L_shr(re[s*23],SCALEFACTOR32_1); x11 = L_shr(im[s*23],SCALEFACTOR32_1); x12 = L_shr(re[s*27],SCALEFACTOR32_1); x13 = L_shr(im[s*27],SCALEFACTOR32_1); x14 = L_shr(re[s*31],SCALEFACTOR32_1); x15 = L_shr(im[s*31],SCALEFACTOR32_1); t00 = L_add(x00,x08); t02 = L_sub(x00,x08); t01 = L_add(x01,x09); t03 = L_sub(x01,x09); t04 = L_add(x02,x10); t06 = L_sub(x02,x10); t05 = L_add(x03,x11); t07 = L_sub(x03,x11); t08 = L_add(x04,x12); t10 = L_sub(x04,x12); t09 = L_add(x05,x13); t11 = L_sub(x05,x13); t12 = L_add(x06,x14); t14 = L_sub(x06,x14); t13 = L_add(x07,x15); t15 = L_sub(x07,x15); /* Pre-additions and core multiplications */ s00 = L_add(t00,t08); s04 = L_sub(t00,t08); s01 = L_add(t01,t09); s05 = L_sub(t01,t09); s08 = L_sub(t02,t11); s10 = L_add(t02,t11); s09 = L_add(t03,t10); s11 = L_sub(t03,t10); s02 = L_add(t04,t12); s07 = L_sub(t04,t12); s03 = L_add(t05,t13); s06 = L_sub(t13,t05); t01 = L_add(t06,t14); t02 = L_sub(t06,t14); t00 = L_add(t07,t15); t03 = L_sub(t07,t15); s12 = Mpy_32_xx(L_add(t00,t02),C81); s14 = Mpy_32_xx(L_sub(t00,t02),C81); s13 = Mpy_32_xx(L_sub(t03,t01),C81); s15 = Mpy_32_xx(L_add(t01,t03),C82); /* Post-additions */ y48 = L_add(s00,s02); y56 = L_sub(s00,s02); y49 = L_add(s01,s03); y57 = L_sub(s01,s03); y52 = L_sub(s04,s06); y60 = L_add(s04,s06); y53 = L_sub(s05,s07); y61 = L_add(s05,s07); y54 = L_add(s08,s14); y62 = L_sub(s08,s14); y55 = L_add(s09,s15); y63 = L_sub(s09,s15); y50 = L_add(s10,s12); y58 = L_sub(s10,s12); y51 = L_add(s11,s13); y59 = L_sub(s11,s13); /* apply twiddle factors */ y00 = L_shr(y00,SCALEFACTOR32_2); y01 = L_shr(y01,SCALEFACTOR32_2); y02 = L_shr(y02,SCALEFACTOR32_2); y03 = L_shr(y03,SCALEFACTOR32_2); y04 = L_shr(y04,SCALEFACTOR32_2); y05 = L_shr(y05,SCALEFACTOR32_2); y06 = L_shr(y06,SCALEFACTOR32_2); y07 = L_shr(y07,SCALEFACTOR32_2); y08 = L_shr(y08,SCALEFACTOR32_2); y09 = L_shr(y09,SCALEFACTOR32_2); y10 = L_shr(y10,SCALEFACTOR32_2); y11 = L_shr(y11,SCALEFACTOR32_2); y12 = L_shr(y12,SCALEFACTOR32_2); y13 = L_shr(y13,SCALEFACTOR32_2); y14 = L_shr(y14,SCALEFACTOR32_2); y15 = L_shr(y15,SCALEFACTOR32_2); y16 = L_shr(y16,SCALEFACTOR32_2); y17 = L_shr(y17,SCALEFACTOR32_2); y32 = L_shr(y32,SCALEFACTOR32_2); y33 = L_shr(y33,SCALEFACTOR32_2); y48 = L_shr(y48,SCALEFACTOR32_2); y49 = L_shr(y49,SCALEFACTOR32_2); y40 = L_shr(y40,SCALEFACTOR32_2); y41 = L_shr(y41,SCALEFACTOR32_2); cplxMpy3_0(y18, y19, RotVector_32[2* 0+0], RotVector_32[2* 0+1]); cplxMpy3_0(y20, y21, RotVector_32[2* 1+0], RotVector_32[2* 1+1]); cplxMpy3_0(y22, y23, RotVector_32[2* 2+0], RotVector_32[2* 2+1]); cplxMpy3_0(y24, y25, RotVector_32[2* 3+0], RotVector_32[2* 3+1]); cplxMpy3_0(y26, y27, RotVector_32[2* 4+0], RotVector_32[2* 4+1]); cplxMpy3_0(y28, y29, RotVector_32[2* 5+0], RotVector_32[2* 5+1]); cplxMpy3_0(y30, y31, RotVector_32[2* 6+0], RotVector_32[2* 6+1]); cplxMpy3_0(y34, y35, RotVector_32[2* 7+0], RotVector_32[2* 7+1]); cplxMpy3_0(y36, y37, RotVector_32[2* 8+0], RotVector_32[2* 8+1]); cplxMpy3_0(y38, y39, RotVector_32[2* 9+0], RotVector_32[2* 9+1]); cplxMpy3_0(y42, y43, RotVector_32[2*10+0], RotVector_32[2*10+1]); cplxMpy3_0(y44, y45, RotVector_32[2*11+0], RotVector_32[2*11+1]); cplxMpy3_0(y46, y47, RotVector_32[2*12+0], RotVector_32[2*12+1]); cplxMpy3_0(y50, y51, RotVector_32[2*13+0], RotVector_32[2*13+1]); cplxMpy3_0(y52, y53, RotVector_32[2*14+0], RotVector_32[2*14+1]); cplxMpy3_0(y54, y55, RotVector_32[2*15+0], RotVector_32[2*15+1]); cplxMpy3_0(y56, y57, RotVector_32[2*16+0], RotVector_32[2*16+1]); cplxMpy3_0(y58, y59, RotVector_32[2*17+0], RotVector_32[2*17+1]); cplxMpy3_0(y60, y61, RotVector_32[2*18+0], RotVector_32[2*18+1]); cplxMpy3_0(y62, y63, RotVector_32[2*19+0], RotVector_32[2*19+1]); /* 1. FFT4 stage */ /* Pre-additions */ t00 = L_add(y00,y32); t02 = L_sub(y00,y32); t01 = L_add(y01,y33); t03 = L_sub(y01,y33); t04 = L_add(y16,y48); t07 = L_sub(y16,y48); t05 = L_add(y49,y17); t06 = L_sub(y49,y17); /* Post-additions */ re[s* 0] = L_add(t00,t04); move32(); im[s* 0] = L_add(t01,t05); move32(); re[s* 8] = L_sub(t02,t06); move32(); im[s* 8] = L_sub(t03,t07); move32(); re[s*16] = L_sub(t00,t04); move32(); im[s*16] = L_sub(t01,t05); move32(); re[s*24] = L_add(t02,t06); move32(); im[s*24] = L_add(t03,t07); move32(); /* 2. FFT4 stage */ /* Pre-additions */ t00 = L_add(y02,y34); t02 = L_sub(y02,y34); t01 = L_add(y03,y35); t03 = L_sub(y03,y35); t04 = L_add(y18,y50); t07 = L_sub(y18,y50); t05 = L_add(y51,y19); t06 = L_sub(y51,y19); /* Post-additions */ re[s* 1] = L_add(t00,t04); move32(); im[s* 1] = L_add(t01,t05); move32(); re[s* 9] = L_sub(t02,t06); move32(); im[s* 9] = L_sub(t03,t07); move32(); re[s*17] = L_sub(t00,t04); move32(); im[s*17] = L_sub(t01,t05); move32(); re[s*25] = L_add(t02,t06); move32(); im[s*25] = L_add(t03,t07); move32(); /* 3. FFT4 stage */ /* Pre-additions */ t00 = L_add(y04,y36); t02 = L_sub(y04,y36); t01 = L_add(y05,y37); t03 = L_sub(y05,y37); t04 = L_add(y20,y52); t07 = L_sub(y20,y52); t05 = L_add(y53,y21); t06 = L_sub(y53,y21); /* Post-additions */ re[s* 2] = L_add(t00,t04); move32(); im[s* 2] = L_add(t01,t05); move32(); re[s*10] = L_sub(t02,t06); move32(); im[s*10] = L_sub(t03,t07); move32(); re[s*18] = L_sub(t00,t04); move32(); im[s*18] = L_sub(t01,t05); move32(); re[s*26] = L_add(t02,t06); move32(); im[s*26] = L_add(t03,t07); move32(); /* 4. FFT4 stage */ /* Pre-additions */ t00 = L_add(y06,y38); t02 = L_sub(y06,y38); t01 = L_add(y07,y39); t03 = L_sub(y07,y39); t04 = L_add(y22,y54); t07 = L_sub(y22,y54); t05 = L_add(y55,y23); t06 = L_sub(y55,y23); /* Post-additions */ re[s* 3] = L_add(t00,t04); move32(); im[s* 3] = L_add(t01,t05); move32(); re[s*11] = L_sub(t02,t06); move32(); im[s*11] = L_sub(t03,t07); move32(); re[s*19] = L_sub(t00,t04); move32(); im[s*19] = L_sub(t01,t05); move32(); re[s*27] = L_add(t02,t06); move32(); im[s*27] = L_add(t03,t07); move32(); /* 5. FFT4 stage */ /* Pre-additions */ t00 = L_add(y08,y41); t02 = L_sub(y08,y41); t01 = L_sub(y09,y40); t03 = L_add(y09,y40); t04 = L_add(y24,y56); t07 = L_sub(y24,y56); t05 = L_add(y57,y25); t06 = L_sub(y57,y25); /* Post-additions */ re[s* 4] = L_add(t00,t04); move32(); im[s* 4] = L_add(t01,t05); move32(); re[s*12] = L_sub(t02,t06); move32(); im[s*12] = L_sub(t03,t07); move32(); re[s*20] = L_sub(t00,t04); move32(); im[s*20] = L_sub(t01,t05); move32(); re[s*28] = L_add(t02,t06); move32(); im[s*28] = L_add(t03,t07); move32(); /* 6. FFT4 stage */ /* Pre-additions */ t00 = L_add(y10,y42); t02 = L_sub(y10,y42); t01 = L_add(y11,y43); t03 = L_sub(y11,y43); t04 = L_add(y26,y58); t07 = L_sub(y26,y58); t05 = L_add(y59,y27); t06 = L_sub(y59,y27); /* Post-additions */ re[s* 5] = L_add(t00,t04); move32(); im[s* 5] = L_add(t01,t05); move32(); re[s*13] = L_sub(t02,t06); move32(); im[s*13] = L_sub(t03,t07); move32(); re[s*21] = L_sub(t00,t04); move32(); im[s*21] = L_sub(t01,t05); move32(); re[s*29] = L_add(t02,t06); move32(); im[s*29] = L_add(t03,t07); move32(); /* 7. FFT4 stage */ /* Pre-additions */ t00 = L_add(y12,y44); t02 = L_sub(y12,y44); t01 = L_add(y13,y45); t03 = L_sub(y13,y45); t04 = L_add(y28,y60); t07 = L_sub(y28,y60); t05 = L_add(y61,y29); t06 = L_sub(y61,y29); /* Post-additions */ re[s* 6] = L_add(t00,t04); move32(); im[s* 6] = L_add(t01,t05); move32(); re[s*14] = L_sub(t02,t06); move32(); im[s*14] = L_sub(t03,t07); move32(); re[s*22] = L_sub(t00,t04); move32(); im[s*22] = L_sub(t01,t05); move32(); re[s*30] = L_add(t02,t06); move32(); im[s*30] = L_add(t03,t07); move32(); /* 8. FFT4 stage */ /* Pre-additions */ t00 = L_add(y14,y46); t02 = L_sub(y14,y46); t01 = L_add(y15,y47); t03 = L_sub(y15,y47); t04 = L_add(y30,y62); t07 = L_sub(y30,y62); t05 = L_add(y63,y31); t06 = L_sub(y63,y31); /* Post-additions */ re[s* 7] = L_add(t00,t04); move32(); im[s* 7] = L_add(t01,t05); move32(); re[s*15] = L_sub(t02,t06); move32(); im[s*15] = L_sub(t03,t07); move32(); re[s*23] = L_sub(t00,t04); move32(); im[s*23] = L_sub(t01,t05); move32(); re[s*31] = L_add(t02,t06); move32(); im[s*31] = L_add(t03,t07); move32(); } /** * \brief Combined FFT * * \param [i/o] re real part * \param [i/o] im imag part * \param [i ] W rotation factor * \param [i ] len length of fft * \param [i ] dim1 length of fft1 * \param [i ] dim2 length of fft2 * \param [i ] sx stride real and imag part * \param [i ] sc stride phase rotation coefficients * \param [tmp] x 32-bit workbuffer of length=2*len * \param [i ] Woff offset for addressing the rotation vector table * * \return void */ static void fftN2( Word32 *re, Word32 *im, const Word16 *W, Word16 len, Word16 dim1, Word16 dim2, Word16 sx, Word16 sc, Word32 *x, Word16 Woff ) { Word16 i,j; Word32 y[2*20]; assert( len == (dim1*dim2) ); assert( (dim1==3) || (dim1==5) || (dim1==8) || (dim1==10) || (dim1==15) || (dim1==16) || (dim1==20) || (dim1==30) || (dim1==32) ); assert( (dim2==4) || (dim2==8) || (dim2==10) || (dim2==12) || (dim2==16) || (dim2==20) ); FOR (i=0; i