/*==================================================================================== EVS Codec 3GPP TS26.443 Jun 30, 2015. Version CR 26.443-0006 ====================================================================================*/ #include #include #include "prot.h" #if defined __ICL #define restrict __restrict #else #define restrict #endif #ifdef _MSC_VER #pragma warning(disable : 4305) /* disable truncation from double to float warning (VC++)*/ #endif static void fft8(float *vec); static void fft10(float *vec); static void fft16(float *vec); static void fft20(float *vec); static void fft30(float *vec); static void fft5s(float *x, int stride); static const float INV_SQRT2 = 7.071067811865475e-1; static const float COS_PI_DIV8 = 9.238795325112867e-1; static const float COS_3PI_DIV8 = 3.826834323650898e-1; static const float SQRT2PLUS1 = 2.414213562373095; static const float SQRT2MINUS1 = 4.142135623730952e-1; #ifdef _MSC_VER #pragma warning(default : 4305) #endif /******************************************************************************* Functionname: fft8 ******************************************************************************* Description: 8-point FFT. Complex-valued input takes 52 real additions and 4 real multiplications. Arguments: vec - pointer to data (interleaved real / imaginary parts) Return: none *******************************************************************************/ static void fft8(float * restrict vec) { float temp1[16]; float temp2[16]; /* Pre-additions */ temp1[0] = vec[0] + vec[8]; temp1[2] = vec[0] - vec[8]; temp1[1] = vec[1] + vec[9]; temp1[3] = vec[1] - vec[9]; temp1[4] = vec[2] + vec[10]; temp1[6] = vec[2] - vec[10]; temp1[5] = vec[3] + vec[11]; temp1[7] = vec[3] - vec[11]; temp1[8] = vec[4] + vec[12]; temp1[10] = vec[4] - vec[12]; temp1[9] = vec[5] + vec[13]; temp1[11] = vec[5] - vec[13]; temp1[12] = vec[6] + vec[14]; temp1[14] = vec[6] - vec[14]; temp1[13] = vec[7] + vec[15]; temp1[15] = vec[7] - vec[15]; /* Pre-additions and core multiplications */ temp2[0] = temp1[0] + temp1[8]; temp2[4] = temp1[0] - temp1[8]; temp2[1] = temp1[1] + temp1[9]; temp2[5] = temp1[1] - temp1[9]; temp2[8] = temp1[2] - temp1[11]; temp2[10] = temp1[2] + temp1[11]; temp2[9] = temp1[3] + temp1[10]; temp2[11] = temp1[3] - temp1[10]; temp2[2] = temp1[4] + temp1[12]; temp2[7] = temp1[4] - temp1[12]; temp2[3] = temp1[5] + temp1[13]; temp2[6] = temp1[13]- temp1[5]; temp1[1] = temp1[6] + temp1[14]; temp1[2] = temp1[6] - temp1[14]; temp1[0] = temp1[7] + temp1[15]; temp1[3] = temp1[7] - temp1[15]; temp2[12] = (temp1[0] + temp1[2]) * INV_SQRT2; temp2[14] = (temp1[0] - temp1[2]) * INV_SQRT2; temp2[13] = (temp1[3] - temp1[1]) * INV_SQRT2; temp2[15] = (temp1[1] + temp1[3]) * -INV_SQRT2; /* Post-additions */ vec[0] = temp2[0] + temp2[2]; vec[8] = temp2[0] - temp2[2]; vec[1] = temp2[1] + temp2[3]; vec[9] = temp2[1] - temp2[3]; vec[4] = temp2[4] - temp2[6]; vec[12] = temp2[4] + temp2[6]; vec[5] = temp2[5] - temp2[7]; vec[13] = temp2[5] + temp2[7]; vec[6] = temp2[8] + temp2[14]; vec[14] = temp2[8] - temp2[14]; vec[7] = temp2[9] + temp2[15]; vec[15] = temp2[9] - temp2[15]; vec[2] = temp2[10]+ temp2[12]; vec[10] = temp2[10]- temp2[12]; vec[3] = temp2[11]+ temp2[13]; vec[11] = temp2[11]- temp2[13]; } /******************************************************************************* Functionname: fft16 ******************************************************************************* Description: 16-point FFT. Complex-valued input takes 144 real additions and 24 real multiplications. Arguments: vec - pointer to data (interleaved real / imaginary parts) Return: none *******************************************************************************/ /* fast implementation, completely unrolled and inlined */ static void fft16(float * restrict vec) { float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18, temp19, temp110, temp111, temp112, temp113, temp114, temp115; float temp20, temp21, temp22, temp23, temp24, temp25, temp26, temp27, temp28, temp29, temp210, temp211, temp212, temp213, temp214, temp215; float vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; /* even */ vec0 = vec[0] + vec[16]; vec1 = vec[1] + vec[17]; vec2 = vec[2] + vec[18]; vec3 = vec[3] + vec[19]; vec4 = vec[4] + vec[20]; vec5 = vec[5] + vec[21]; vec6 = vec[6] + vec[22]; vec7 = vec[7] + vec[23]; vec8 = vec[8] + vec[24]; vec9 = vec[9] + vec[25]; vec10 = vec[10] + vec[26]; vec11 = vec[11] + vec[27]; vec12 = vec[12] + vec[28]; vec13 = vec[13] + vec[29]; vec14 = vec[14] + vec[30]; vec15 = vec[15] + vec[31]; /* Pre-additions */ temp10 = vec0 + vec8; temp12 = vec0 - vec8; temp11 = vec1 + vec9; temp13 = vec1 - vec9; temp14 = vec2 + vec10; temp16 = vec2 - vec10; temp15 = vec3 + vec11; temp17 = vec3 - vec11; temp18 = vec4 + vec12; temp110 = vec4 - vec12; temp19 = vec5 + vec13; temp111 = vec5 - vec13; temp112 = vec6 + vec14; temp114 = vec6 - vec14; temp113 = vec7 + vec15; temp115 = vec7 - vec15; /* Pre-additions and core multiplications */ temp20 = temp10 + temp18; temp24 = temp10 - temp18; temp21 = temp11 + temp19; temp25 = temp11 - temp19; temp28 = temp12 - temp111; temp210 = temp12 + temp111; temp29 = temp13 + temp110; temp211 = temp13 - temp110; temp22 = temp14 + temp112; temp27 = temp14 - temp112; temp23 = temp15 + temp113; temp26 = temp113- temp15; temp11 = temp16 + temp114; temp12 = temp16 - temp114; temp10 = temp17 + temp115; temp13 = temp17 - temp115; temp212 = (temp10 + temp12) * INV_SQRT2; temp214 = (temp10 - temp12) * INV_SQRT2; temp213 = (temp13 - temp11) * INV_SQRT2; temp215 = (temp11 + temp13) * -INV_SQRT2; /* odd */ vec0 = vec[0] - vec[16]; vec1 = vec[1] - vec[17]; vec2 = vec[2] - vec[18]; vec3 = vec[3] - vec[19]; vec4 = vec[4] - vec[20]; vec5 = vec[5] - vec[21]; vec6 = vec[6] - vec[22]; vec7 = vec[7] - vec[23]; vec8 = vec[8] - vec[24]; vec9 = vec[9] - vec[25]; vec10 = vec[10] - vec[26]; vec11 = vec[11] - vec[27]; vec12 = vec[12] - vec[28]; vec13 = vec[13] - vec[29]; vec14 = vec[14] - vec[30]; vec15 = vec[15] - vec[31]; /* Pre-additions and core multiplications */ temp19 = (vec2 + vec14) * -COS_3PI_DIV8; temp110 = (vec2 - vec14) * COS_PI_DIV8; temp18 = (vec3 + vec15) * COS_3PI_DIV8; temp111 = (vec3 - vec15) * COS_PI_DIV8; temp15 = (vec4 + vec12) * -INV_SQRT2; temp16 = (vec4 - vec12) * INV_SQRT2; temp14 = (vec5 + vec13) * INV_SQRT2; temp17 = (vec5 - vec13) * INV_SQRT2; temp113 = (vec6 + vec10) * -COS_PI_DIV8; temp114 = (vec6 - vec10) * COS_3PI_DIV8; temp112 = (vec7 + vec11) * COS_PI_DIV8; temp115 = (vec7 - vec11) * COS_3PI_DIV8; /* Core multiplications */ vec2 = temp18 * SQRT2PLUS1 - temp112 * SQRT2MINUS1; vec3 = temp19 * SQRT2PLUS1 - temp113 * SQRT2MINUS1; vec4 = temp110 * SQRT2MINUS1 - temp114 * SQRT2PLUS1; vec5 = temp111 * SQRT2MINUS1 - temp115 * SQRT2PLUS1; /* Post-additions */ temp18 += temp112; temp19 += temp113; temp110+= temp114; temp111+= temp115; vec6 = vec0 + temp14; vec10 = vec0 - temp14; vec7 = vec1 + temp15; vec11 = vec1 - temp15; vec12 = temp16 - vec9; vec14 = temp16 + vec9; vec13 = vec8 + temp17; vec15 = vec8 - temp17; temp10 = vec6 - vec14; temp12 = vec6 + vec14; temp11 = vec7 + vec15; temp13 = vec7 - vec15; temp14 = vec10 + vec12; temp16 = vec10 - vec12; temp15 = vec11 + vec13; temp17 = vec11 - vec13; vec10 = temp18 + temp110; temp110 = temp18 - temp110; vec11 = temp19 + temp111; temp111 = temp19 - temp111; temp112 = vec2 + vec4; temp114 = vec2 - vec4; temp113 = vec3 + vec5; temp115 = vec3 - vec5; /* Post-additions */ *vec++ = temp20 + temp22; *vec++ = temp21 + temp23; *vec++ = temp12 + vec10; *vec++ = temp13 + vec11; *vec++ = temp210+ temp212; *vec++ = temp211+ temp213; *vec++ = temp10 + temp112; *vec++ = temp11 + temp113; *vec++ = temp24 - temp26; *vec++ = temp25 - temp27; *vec++ = temp16 + temp114; *vec++ = temp17 + temp115; *vec++ = temp28 + temp214; *vec++ = temp29 + temp215; *vec++ = temp14 + temp110; *vec++ = temp15 + temp111; *vec++ = temp20 - temp22; *vec++ = temp21 - temp23; *vec++ = temp12 - vec10; *vec++ = temp13 - vec11; *vec++ = temp210- temp212; *vec++ = temp211- temp213; *vec++ = temp10 - temp112; *vec++ = temp11 - temp113; *vec++ = temp24 + temp26; *vec++ = temp25 + temp27; *vec++ = temp16 - temp114; *vec++ = temp17 - temp115; *vec++ = temp28 - temp214; *vec++ = temp29 - temp215; *vec++ = temp14 - temp110; *vec++ = temp15 - temp111; } /******************************************************************************* Functionname: fft15 ******************************************************************************* Description: 15-point FFT. Complex-valued input takes 176 real additions and 34 real multiplications. Arguments: vec - pointer to data (interleaved real / imaginary parts) Return: none *******************************************************************************/ static void fft15(float * restrict vec) { float r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17; float i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17; float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18, tmp19, tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp28, tmp29; /* Pre-additions real part */ r1 = vec[2] + vec[8]; r2 = vec[2] - vec[8]; r3 = vec[4] + vec[16]; r4 = vec[4] - vec[16]; r5 = vec[6] + vec[24]; r6 = vec[6] - vec[24]; r7 = vec[10] + vec[20]; r8 = vec[10] - vec[20]; r9 = vec[12] + vec[18]; r10 = vec[12] - vec[18]; r11 = vec[14] + vec[26]; r12 = vec[14] - vec[26]; r13 = vec[22] + vec[28]; r14 = vec[22] - vec[28]; tmp2 = r1 + r3; tmp4 = r1 - r3; tmp6 = r2 + r14; tmp8 = r2 - r14; tmp10 = r4 + r12; tmp12 = r4 - r12; tmp14 = r5 + r9; tmp16 = r5 - r9; tmp18 = r11 + r13; tmp20 = r11 - r13; /* Pre-additions imaginary part */ i1 = vec[3] + vec[9]; i2 = vec[3] - vec[9]; i3 = vec[5] + vec[17]; i4 = vec[5] - vec[17]; i5 = vec[7] + vec[25]; i6 = vec[7] - vec[25]; i7 = vec[11] + vec[21]; i8 = vec[11] - vec[21]; i9 = vec[13] + vec[19]; i10 = vec[13] - vec[19]; i11 = vec[15] + vec[27]; i12 = vec[15] - vec[27]; i13 = vec[23] + vec[29]; i14 = vec[23] - vec[29]; tmp3 = i1 + i3; tmp5 = i1 - i3; tmp7 = i2 + i14; tmp9 = i2 - i14; tmp11 = i4 + i12; tmp13 = i4 - i12; tmp15 = i5 + i9; tmp17 = i5 - i9; tmp19 = i11 + i13; tmp21 = i11 - i13; /* Pre-additions and core multiplications */ tmp28= tmp4 + tmp20; tmp29= tmp5 + tmp21; r4 = tmp2 + tmp18; i4 = tmp3 + tmp19; r3 = (r4 + tmp14) * -1.25f; i3 = (i4 + tmp15) * -1.25f; r2 = (tmp29 - i8) * -8.660254037844387e-1f; i2 = (tmp28 - r8) * 8.660254037844387e-1f; r1 = r4 + r7; i1 = i4 + i7; r0 = r1 + vec[0] + tmp14; i0 = i1 + vec[1] + tmp15; r7 = tmp4 - tmp20; i7 = tmp5 - tmp21; r8 = (tmp3 - tmp19) * -4.841229182759272e-1f; i8 = (tmp2 - tmp18) * 4.841229182759272e-1f; tmp0 = tmp6 + r10; tmp1 = tmp7 + i10; tmp2 = r6 - tmp10; tmp3 = i6 - tmp11; r10 = tmp7 * -2.308262652881440f; i10 = tmp6 * 2.308262652881440f; r11 = tmp8 * 1.332676064001459f; i11 = tmp9 * 1.332676064001459f; r6 = (r7 - tmp16) * 5.590169943749475e-1f; i6 = (i7 - tmp17) * 5.590169943749475e-1f; r12 = (tmp1 + tmp3) * 5.877852522924733e-1f; i12 = (tmp0 + tmp2) * -5.877852522924733e-1f; r13 = (tmp7 - tmp11) * -8.816778784387098e-1f; i13 = (tmp6 - tmp10) * 8.816778784387098e-1f; r14 = (tmp8 + tmp12) * 5.090369604551274e-1f; i14 = (tmp9 + tmp13) * 5.090369604551274e-1f; r16 = tmp11 * 5.449068960040204e-1f; i16 = tmp10 * -5.449068960040204e-1f; r17 = tmp12 * 3.146021430912046e-1f; i17 = tmp13 * 3.146021430912046e-1f; r4 *= 1.875f; i4 *= 1.875f; r1 *= -1.5f; i1 *= -1.5f; r7 *= -8.385254915624212e-1f; i7 *= -8.385254915624212e-1f; r5 = tmp29 * 1.082531754730548f; i5 = tmp28 * -1.082531754730548f; r9 = tmp1 * 1.538841768587627f; i9 = tmp0 * -1.538841768587627f; r15 = tmp3 * 3.632712640026803e-1f; i15 = tmp2 * -3.632712640026803e-1f; /* Post-additions real part */ tmp2 = r0 + r1; tmp4 = r3 + r6; tmp6 = r3 - r6; tmp8 = r4 + r5; tmp10 = r4 - r5; tmp12 = r7 + r8; tmp14 = r7 - r8; tmp16 = r13 + r16; tmp18 = r14 + r17; tmp20 = r10 - r13; tmp22 = r11 - r14; tmp24 = r12 + r15; tmp26 = r12 - r9; r1 = tmp2 + r2; r2 = tmp2 - r2; r3 = tmp4 + tmp26; r4 = tmp4 - tmp26; r5 = tmp6 + tmp24; r6 = tmp6 - tmp24; r7 = tmp16 + tmp18; r8 = tmp16 - tmp18; r9 = tmp20 - tmp22; r10 = tmp20 + tmp22; r11 = r1 + tmp8; r12 = r2 + tmp10; r13 = r11 - tmp12; r14 = r12 - tmp14; r15 = r12 + tmp14; r16 = r11 + tmp12; /* Post-additions imaginary part */ tmp3 = i0 + i1; tmp5 = i3 + i6; tmp7 = i3 - i6; tmp9 = i4 + i5; tmp11 = i4 - i5; tmp13 = i7 + i8; tmp15 = i7 - i8; tmp17 = i13 + i16; tmp19 = i14 + i17; tmp21 = i10 - i13; tmp23 = i11 - i14; tmp25 = i12 + i15; tmp27 = i12 - i9; i1 = tmp3 + i2; i2 = tmp3 - i2; i3 = tmp5 + tmp27; i4 = tmp5 - tmp27; i5 = tmp7 + tmp25; i6 = tmp7 - tmp25; i7 = tmp17 + tmp19; i8 = tmp17 - tmp19; i9 = tmp21 - tmp23; i10 = tmp21 + tmp23; i11 = i1 + tmp9; i12 = i2 + tmp11; i13 = i11 - tmp13; i14 = i12 - tmp15; i15 = i12 + tmp15; i16 = i11 + tmp13; *vec++ = r0; *vec++ = i0; *vec++ = r13 + r5 + r7; *vec++ = i13 + i5 + i7; *vec++ = r15 + r3 - r9; *vec++ = i15 + i3 - i9; *vec++ = r0 + r4; *vec++ = i0 + i4; *vec++ = r13 + r6 - r7; *vec++ = i13 + i6 - i7; *vec++ = r2; *vec++ = i2; *vec++ = r0 + r5; *vec++ = i0 + i5; *vec++ = r16 + r3 - r10; *vec++ = i16 + i3 - i10; *vec++ = r15 + r4 + r9; *vec++ = i15 + i4 + i9; *vec++ = r0 + r6; *vec++ = i0 + i6; *vec++ = r1; *vec++ = i1; *vec++ = r14 + r5 + r8; *vec++ = i14 + i5 + i8; *vec++ = r0 + r3; *vec++ = i0 + i3; *vec++ = r16 + r4 + r10; *vec++ = i16 + i4 + i10; *vec++ = r14 + r6 - r8; *vec++ = i14 + i6 - i8; } /******************************************************************************* Functionname: fft5s ******************************************************************************* Description: 5-point FFT. Arguments: x - pointer to input data (interleaved real / imaginary parts) stride - stride for input data Return: none *******************************************************************************/ static const float C51 = 0.9510565162951535f; static const float C52 = -1.5388417685876270f; static const float C53 = -0.3632712640026803f; static const float C54 = 0.5590169943749475f; static const float C55 = -1.25f; static void fft5s(float *x, int stride) { float r1,r2,r3,r4; float s1,s2,s3,s4; float t; /* real part */ r1 = x[1*stride] + x[4*stride]; r4 = x[1*stride] - x[4*stride]; r3 = x[2*stride] + x[3*stride]; r2 = x[2*stride] - x[3*stride]; t = (r1-r3) * C54; r1 = r1 + r3; x[0] = x[0] + r1; r1 = x[0] + (r1 * C55); r3 = r1 - t; r1 = r1 + t; t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ s1 = x[1*stride+1] + x[4*stride+1]; s4 = x[1*stride+1] - x[4*stride+1]; s3 = x[2*stride+1] + x[3*stride+1]; s2 = x[2*stride+1] - x[3*stride+1]; t = (s1 - s3) * C54; s1 = s1 + s3; x[1] = x[1] + s1; s1 = x[1] + (s1 * C55); s3 = s1 - t; s1 = s1 + t; t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ x[1*stride] = r1 + s2; x[4*stride] = r1 - s2; x[2*stride] = r3 - s4; x[3*stride] = r3 + s4; x[1*stride+1] = s1 - r2; x[4*stride+1] = s1 + r2; x[2*stride+1] = s3 + r4; x[3*stride+1] = s3 - r4; } /** * \brief Function performs a complex 10-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR10 bits. * * WOPS FLC version: 1093 cycles * WOPS with 32x16 bit multiplications: 196 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft10(float * restrict vec) { float t; float r1,r2,r3,r4; float s1,s2,s3,s4; float y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; float y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; /* 2 fft5 stages */ /* real part */ r1 = vec[12] + vec[8]; r4 = vec[12] - vec[8]; r3 = vec[4] + vec[16]; r2 = vec[4] - vec[16]; t = (r1 - r3) * C54; r1 = r1 + r3; y00 = vec[0] + r1; r1 = y00 + (r1 * C55); r3 = r1 - t; r1 = r1 + t; t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ s1 = vec[13] + vec[9]; s4 = vec[13] - vec[9]; s3 = vec[5] + vec[17]; s2 = vec[5] - vec[17]; t = (s1 - s3) * C54; s1 = s1 + s3; y01 = vec[1] + s1; s1 = y01 + (s1 * C55); s3 = s1 - t; s1 = s1 + t; t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y04 = r1 + s2; y16 = r1 - s2; y08 = r3 - s4; y12 = r3 + s4; y05 = s1 - r2; y17 = s1 + r2; y09 = s3 + r4; y13 = s3 - r4; /* real part */ r1 = vec[2] + vec[18]; r4 = vec[2] - vec[18]; r3 = vec[14] + vec[6]; r2 = vec[14] - vec[6]; t = (r1 - r3) * C54; r1 = r1 + r3; y02 = vec[10] + r1; r1 = y02 + (r1 * C55); r3 = r1 - t; r1 = r1 + t; t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ s1 = vec[3] + vec[19]; s4 = vec[3] - vec[19]; s3 = vec[15] + vec[7]; s2 = vec[15] - vec[7]; t = (s1 - s3) * C54; s1 = s1 + s3; y03 = vec[11] + s1; s1 = y03 + (s1 * C55); s3 = s1 - t; s1 = s1 + t; t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y06 = r1 + s2; y18 = r1 - s2; y10 = r3 - s4; y14 = r3 + s4; y07 = s1 - r2; y19 = s1 + r2; y11 = s3 + r4; y15 = s3 - r4; /* 5 fft2 stages */ vec[0] = y00 + y02; vec[1] = y01 + y03; vec[2] = y12 - y14; vec[3] = y13 - y15; vec[4] = y04 + y06; vec[5] = y05 + y07; vec[6] = y16 - y18; vec[7] = y17 - y19; vec[8] = y08 + y10; vec[9] = y09 + y11; vec[10] = y00 - y02; vec[11] = y01 - y03; vec[12] = y12 + y14; vec[13] = y13 + y15; vec[14] = y04 - y06; vec[15] = y05 - y07; vec[16] = y16 + y18; vec[17] = y17 + y19; vec[18] = y08 - y10; vec[19] = y09 - y11; } /** * \brief Function performs a complex 20-point FFT * The FFT is performed inplace. The result of the FFT * is scaled by SCALEFACTOR20 bits. * * WOPS FLC version: 1509 cycles * WOPS with 32x16 bit multiplications: 432 cycles * * \param [i/o] re real input / output * \param [i/o] im imag input / output * \param [i ] s stride real and imag input / output * * \return void */ static void fft20(float *signal) { const int s = 2; float *re = signal, *im = signal+1; float r1,r2,r3,r4; float s1,s2,s3,s4; float x0,x1,x2,x3,x4; float t,t0,t1,t2,t3,t4,t5,t6,t7; float y00,y01,y02,y03,y04,y05,y06,y07,y08,y09; float y10,y11,y12,y13,y14,y15,y16,y17,y18,y19; float y20,y21,y22,y23,y24,y25,y26,y27,y28,y29; float y30,y31,y32,y33,y34,y35,y36,y37,y38,y39; /* */ /* 1. FFT5 stage */ /* real part */ x0 = re[s* 0]; x1 = re[s*16]; x2 = re[s*12]; x3 = re[s* 8]; x4 = re[s* 4]; r1 = x1 + x4; r4 = x1 - x4; r3 = x2 + x3; r2 = x2 - x3; t = (r1 - r3) * C54; r1 = r1 + r3; y00 = x0 + r1; r1 = y00 + (r1 * C55); r3 = r1 - t; r1 = r1 + t; t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ x0 = im[s* 0]; x1 = im[s*16]; x2 = im[s*12]; x3 = im[s* 8]; x4 = im[s* 4]; s1 = x1 + x4; s4 = x1 - x4; s3 = x2 + x3; s2 = x2 - x3; t = (s1 - s3) * C54; s1 = (s1 + s3); y01 = (x0 + s1); s1 = y01 + (s1 * C55); s3 = (s1 - t); s1 = (s1 + t); t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y08 = (r1 + s2); y32 = (r1 - s2); y16 = (r3 - s4); y24 = (r3 + s4); y09 = (s1 - r2); y33 = (s1 + r2); y17 = (s3 + r4); y25 = (s3 - r4); /* 2. FFT5 stage */ /* real part */ x0 = re[s* 5]; x1 = re[s* 1]; x2 = re[s*17]; x3 = re[s*13]; x4 = re[s* 9]; r1 = (x1 + x4); r4 = (x1 - x4); r3 = (x2 + x3); r2 = (x2 - x3); t = (r1 - r3) * C54; r1 = (r1 + r3); y02 = (x0 + r1); r1 = y02 + (r1 * C55); r3 = (r1 - t); r1 = (r1 + t); t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ x0 = im[s* 5]; x1 = im[s* 1]; x2 = im[s*17]; x3 = im[s*13]; x4 = im[s* 9]; s1 = (x1 + x4); s4 = (x1 - x4); s3 = (x2 + x3); s2 = (x2 - x3); t = (s1 - s3) * C54; s1 = (s1 + s3); y03 = (x0 + s1); s1 = y03 + (s1 * C55); s3 = (s1 - t); s1 = (s1 + t); t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y10 = (r1 + s2); y34 = (r1 - s2); y18 = (r3 - s4); y26 = (r3 + s4); y11 = (s1 - r2); y35 = (s1 + r2); y19 = (s3 + r4); y27 = (s3 - r4); /* 3. FFT5 stage */ /* real part */ x0 = re[s*10]; x1 = re[s* 6]; x2 = re[s* 2]; x3 = re[s*18]; x4 = re[s*14]; r1 = (x1 + x4); r4 = (x1 - x4); r3 = (x2 + x3); r2 = (x2 - x3); t = (r1 - r3) * C54; r1 = (r1 + r3); y04 = (x0 + r1); r1 = y04 + (r1 * C55); r3 = (r1 - t); r1 = (r1 + t); t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ x0 = im[s*10]; x1 = im[s* 6]; x2 = im[s* 2]; x3 = im[s*18]; x4 = im[s*14]; s1 = (x1 + x4); s4 = (x1 - x4); s3 = (x2 + x3); s2 = (x2 - x3); t = (s1 - s3) * C54; s1 = (s1 + s3); y05 = (x0 + s1); s1 = y05 + (s1 * C55); s3 = (s1 - t); s1 = (s1 + t); t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y12 = (r1 + s2); y36 = (r1 - s2); y20 = (r3 - s4); y28 = (r3 + s4); y13 = (s1 - r2); y37 = (s1 + r2); y21 = (s3 + r4); y29 = (s3 - r4); /* 4. FFT5 stage */ /* real part */ x0 = re[s*15]; x1 = re[s*11]; x2 = re[s* 7]; x3 = re[s* 3]; x4 = re[s*19]; r1 = (x1 + x4); r4 = (x1 - x4); r3 = (x2 + x3); r2 = (x2 - x3); t = (r1 - r3) * C54; r1 = (r1 + r3); y06 = (x0 + r1); r1 = y06 + (r1 * C55); r3 = (r1 - t); r1 = (r1 + t); t = (r4 + r2) * C51; r4 = t + (r4 * C52); r2 = t + (r2 * C53); /* imaginary part */ x0 = im[s*15]; x1 = im[s*11]; x2 = im[s* 7]; x3 = im[s* 3]; x4 = im[s*19]; s1 = (x1 + x4); s4 = (x1 - x4); s3 = (x2 + x3); s2 = (x2 - x3); t = (s1 - s3) * C54; s1 = (s1 + s3); y07 = (x0 + s1); s1 = y07 + (s1 * C55); s3 = (s1 - t); s1 = (s1 + t); t = (s4 + s2) * C51; s4 = t + (s4 * C52); s2 = t + (s2 * C53); /* combination */ y14 = (r1 + s2); y38 = (r1 - s2); y22 = (r3 - s4); y30 = (r3 + s4); y15 = (s1 - r2); y39 = (s1 + r2); y23 = (s3 + r4); y31 = (s3 - r4); /* 1. FFT4 stage */ /* Pre-additions */ t0 = (y00 + y04); t2 = (y00 - y04); t1 = (y01 + y05); t3 = (y01 - y05); t4 = (y02 + y06); t7 = (y02 - y06); t5 = (y07 + y03); t6 = (y07 - y03); /* Post-additions */ re[s* 0] = (t0 + t4); im[s* 0] = (t1 + t5); re[s* 5] = (t2 - t6); im[s* 5] = (t3 - t7); re[s*10] = (t0 - t4); im[s*10] = (t1 - t5); re[s*15] = (t2 + t6); im[s*15] = (t3 + t7); /* 2. FFT4 stage */ /* Pre-additions */ t0 = (y08 + y12); t2 = (y08 - y12); t1 = (y09 + y13); t3 = (y09 - y13); t4 = (y10 + y14); t7 = (y10 - y14); t5 = (y15 + y11); t6 = (y15 - y11); /* Post-additions */ re[s* 4] = (t0 + t4); im[s* 4] = (t1 + t5); re[s* 9] = (t2 - t6); im[s* 9] = (t3 - t7); re[s*14] = (t0 - t4); im[s*14] = (t1 - t5); re[s*19] = (t2 + t6); im[s*19] = (t3 + t7); /* 3. FFT4 stage */ /* Pre-additions */ t0 = (y16 + y20); t2 = (y16 - y20); t1 = (y17 + y21); t3 = (y17 - y21); t4 = (y18 + y22); t7 = (y18 - y22); t5 = (y23 + y19); t6 = (y23 - y19); /* Post-additions */ re[s* 8] = (t0 + t4); im[s* 8] = (t1 + t5); re[s*13] = (t2 - t6); im[s*13] = (t3 - t7); re[s*18] = (t0 - t4); im[s*18] = (t1 - t5); re[s* 3] = (t2 + t6); im[s* 3] = (t3 + t7); /* 4. FFT4 stage */ /* Pre-additions */ t0 = (y24 + y28); t2 = (y24 - y28); t1 = (y25 + y29); t3 = (y25 - y29); t4 = (y26 + y30); t7 = (y26 - y30); t5 = (y31 + y27); t6 = (y31 - y27); /* Post-additions */ re[s*12] = (t0 + t4); im[s*12] = (t1 + t5); re[s*17] = (t2 - t6); im[s*17] = (t3 - t7); re[s* 2] = (t0 - t4); im[s* 2] = (t1 - t5); re[s* 7] = (t2 + t6); im[s* 7] = (t3 + t7); /* 5. FFT4 stage */ /* Pre-additions */ t0 = (y32 + y36); t2 = (y32 - y36); t1 = (y33 + y37); t3 = (y33 - y37); t4 = (y34 + y38); t7 = (y34 - y38); t5 = (y39 + y35); t6 = (y39 - y35); /* Post-additions */ re[s*16] = (t0 + t4); im[s*16] = (t1 + t5); re[s* 1] = (t2 - t6); im[s* 1] = (t3 - t7); re[s* 6] = (t0 - t4); im[s* 6] = (t1 - t5); re[s*11] = (t2 + t6); im[s*11] = (t3 + t7); /* */ /* */ } /******************************************************************************* Functionname: fft30 ******************************************************************************* Description: 30-point FFT. Arguments: in - pointer to data (interleaved real / imaginary parts) Return: none *******************************************************************************/ static void fft30(float * restrict in) { int i; float temp[60]; float * temp_l = temp; float * temp_lu = temp + 2*8; float * temp_h = temp + 2*15; float * temp_hu = temp + 2*15 + 2*8; float *in_l = in + 2*0; float *in_h = in + 2*15; for(i=0; i<7; i++) { *temp_l++ = *in_l++; *temp_l++ = *in_l++; *temp_h++ = *in_h++; *temp_h++ = *in_h++; *temp_l++ = *in_h++; *temp_l++ = *in_h++; *temp_h++ = *in_l++; *temp_h++ = *in_l++; } *temp_l++ = *in_l++; *temp_l++ = *in_l++; *temp_h++ = *in_h++; *temp_h++ = *in_h++; temp_l = temp; temp_h = temp + 30; fft15(temp_l); fft15(temp_h); in_l = in + 2*0; in_h = in + 2*15; for(i=0; i<7; i++) { *in_l++ = *temp_l + *temp_h; *in_h++ = *temp_l++ - *temp_h++; *in_l++ = *temp_l + *temp_h; *in_h++ = *temp_l++ - *temp_h++; *in_h++ = *temp_lu + *temp_hu; *in_l++ = *temp_lu++ - *temp_hu++; *in_h++ = *temp_lu + *temp_hu; *in_l++ = *temp_lu++ - *temp_hu++; } *in_l++ = *temp_l + *temp_h; *in_h++ = *temp_l++ - *temp_h++; *in_l++ = *temp_l + *temp_h; *in_h++ = *temp_l++ - *temp_h++; } /*-------------------------------------------------------------------* * fft_cldfb() * * Interface functions FFT subroutines *--------------------------------------------------------------------*/ void fft_cldfb ( float *data, /* i/o: input/output vector */ int size /* size of fft operation */ ) { switch(size) { case 5: fft5s(data,2); break; case 8: fft8(data); break; case 10: fft10(data); break; case 16: fft16(data); break; case 20: fft20(data); break; case 30: fft30(data); break; default: assert(0); break; } }