#include "config.h"
#include "ssc_guts.h"
#include "ssc_macro.h"

#if 1	// modify yang
#ifndef VC_PROJ
#include <xtensa/tie/xt_hifi2.h>
extern ae_int64 AE_MUL32_HH(ae_int32x2 d0, ae_int32x2 d1);
extern void AE_MULA32_HH(ae_int64 d /*inout*/, ae_int32x2 d0, ae_int32x2 d1);
extern void AE_MULS32_HH(ae_int64 d /*inout*/, ae_int32x2 d0, ae_int32x2 d1);
#endif



#endif



#ifndef FFT_23BIT_TABLE
static const short fft_twiddles44100_864[648] = {
	32767, 0, 32765, -477, 32754, -953, 32737, -1429, 32713, -1905, 32681, -2381, 32643, -2856, 
	32598, -3330, 32546, -3804, 32488, -4277, 32422, -4749, 32350, -5220, 32270, -5690, 32184, 
	-6159, 32091, -6626, 31991, -7092, 31885, -7557, 31771, -8020, 31651, -8481, 31525, -8940, 
	31391, -9398, 31251, -9854, 31105, -10307, 30952, -10758, 30792, -11207, 30626, -11654, 30453, 
	-12098, 30274, -12540, 30088, -12979, 29896, -13415, 29698, -13848, 29493, -14279, 29283, 
	-14706, 29066, -15131, 28842, -15552, 28613, -15970, 28378, -16384, 28137, -16795, 27889, 
	-17202, 27636, -17606, 27377, -18006, 27112, -18403, 26842, -18795, 26566, -19183, 26284, 
	-19568, 25997, -19948, 25704, -20324, 25405, -20696, 25102, -21063, 24793, -21426, 24479, 
	-21784, 24159, -22138, 23835, -22487, 23505, -22831, 23170, -23170, 22831, -23505, 22487, 
	-23835, 22138, -24159, 21784, -24479, 21426, -24793, 21063, -25102, 20696, -25405, 20324, 
	-25704, 19948, -25997, 19568, -26284, 19183, -26566, 18795, -26842, 18403, -27112, 18006, 
	-27377, 17606, -27636, 17202, -27889, 16795, -28137, 16384, -28378, 15970, -28613, 15552, 
	-28842, 15131, -29066, 14706, -29283, 14279, -29493, 13848, -29698, 13415, -29896, 12979, 
	-30088, 12540, -30274, 12098, -30453, 11654, -30626, 11207, -30792, 10758, -30952, 10307, 
	-31105, 9854, -31251, 9398, -31391, 8940, -31525, 8481, -31651, 8020, -31771, 7557, -31885, 
	7092, -31991, 6626, -32091, 6159, -32184, 5690, -32270, 5220, -32350, 4749, -32422, 4277, 
	-32488, 3804, -32546, 3330, -32598, 2856, -32643, 2381, -32681, 1905, -32713, 1429, -32737, 
	953, -32754, 477, -32765, 

	0, -32767, -477, -32765, -953, -32754, -1429, -32737, -1905, -32713, 
	-2381, -32681, -2856, -32643, -3330, -32598, -3804, -32546, -4277, -32488, -4749, -32422, -5220, 
	-32350, -5690, -32270, -6159, -32184, -6626, -32091, -7092, -31991, -7557, -31885, -8020, -31771, 
	-8481, -31651, -8940, -31525, -9398, -31391, -9854, -31251, -10307, -31105, -10758, -30952, -11207, 
	-30792, -11654, -30626, -12098, -30453, -12540, -30274, -12979, -30088, -13415, -29896, -13848, 
	-29698, -14279, -29493, -14706, -29283, -15131, -29066, -15552, -28842, -15970, -28613, -16384, 
	-28378, -16795, -28137, -17202, -27889, -17606, -27636, -18006, -27377, -18403, -27112, -18795, 
	-26842, -19183, -26566, -19568, -26284, -19948, -25997, -20324, -25704, -20696, -25405, -21063, 
	-25102, -21426, -24793, -21784, -24479, -22138, -24159, -22487, -23835, -22831, -23505, -23170, 
	-23170, -23505, -22831, -23835, -22487, -24159, -22138, -24479, -21784, -24793, -21426, -25102, 
	-21063, -25405, -20696, -25704, -20324, -25997, -19948, -26284, -19568, -26566, -19183, -26842, 
	-18795, -27112, -18403, -27377, -18006, -27636, -17606, -27889, -17202, -28137, -16795, -28378, 
	-16384, -28613, -15970, -28842, -15552, -29066, -15131, -29283, -14706, -29493, -14279, -29698, 
	-13848, -29896, -13415, -30088, -12979, -30274, -12540, -30453, -12098, -30626, -11654, -30792, 
	-11207, -30952, -10758, -31105, -10307, -31251, -9854, -31391, -9398, -31525, -8940, -31651, 
	-8481, -31771, -8020, -31885, -7557, -31991, -7092, -32091, -6626, -32184, -6159, -32270, -5690, 
	-32350, -5220, -32422, -4749, -32488, -4277, -32546, -3804, -32598, -3330, -32643, -2856, -32681, 
	-2381, -32713, -1905, -32737, -1429, -32754, -953, -32765, -477, 

	-32767, 0, -32765, 477, -32754, 
	953, -32737, 1429, -32713, 1905, -32681, 2381, -32643, 2856, -32598, 3330, -32546, 3804, -32488, 
	4277, -32422, 4749, -32350, 5220, -32270, 5690, -32184, 6159, -32091, 6626, -31991, 7092, -31885, 
	7557, -31771, 8020, -31651, 8481, -31525, 8940, -31391, 9398, -31251, 9854, -31105, 10307, -30952, 
	10758, -30792, 11207, -30626, 11654, -30453, 12098, -30274, 12540, -30088, 12979, -29896, 13415, 
	-29698, 13848, -29493, 14279, -29283, 14706, -29066, 15131, -28842, 15552, -28613, 15970, -28378, 
	16384, -28137, 16795, -27889, 17202, -27636, 17606, -27377, 18006, -27112, 18403, -26842, 18795, 
	-26566, 19183, -26284, 19568, -25997, 19948, -25704, 20324, -25405, 20696, -25102, 21063, -24793, 
	21426, -24479, 21784, -24159, 22138, -23835, 22487, -23505, 22831, -23170, 23170, -22831, 23505, 
	-22487, 23835, -22138, 24159, -21784, 24479, -21426, 24793, -21063, 25102, -20696, 25405, -20324, 
	25704, -19948, 25997, -19568, 26284, -19183, 26566, -18795, 26842, -18403, 27112, -18006, 27377, 
	-17606, 27636, -17202, 27889, -16795, 28137, -16384, 28378, -15970, 28613, -15552, 28842, -15131, 
	29066, -14706, 29283, -14279, 29493, -13848, 29698, -13415, 29896, -12979, 30088, -12540, 30274, 
	-12098, 30453, -11654, 30626, -11207, 30792, -10758, 30952, -10307, 31105, -9854, 31251, -9398, 
	31391, -8940, 31525, -8481, 31651, -8020, 31771, -7557, 31885, -7092, 31991, -6626, 32091, -6159, 
	32184, -5690, 32270, -5220, 32350, -4749, 32422, -4277, 32488, -3804, 32546, -3330, 32598, -2856, 
	32643, -2381, 32681, -1905, 32713, -1429, 32737, -953, 32754, -477, 32765, 
};
#else
static const int fft_twiddles44100_864[648] = {

	4194303 ,0,4193859 ,-61002,4192530 ,-121991,4190311 ,-182951,4187207 ,-243878,4183218 ,-304750,4178345 ,-365559,
	4172586 ,-426288,4165942 ,-486929,4158422 ,-547466,4150021 ,-607889,4140739 ,-668182,4130584 ,-728333,4119553 ,-788332,
	4107654 ,-848164,4094882 ,-907815,4081247 ,-967274,4066747 ,-1026527,4051387 ,-1085566,4035168 ,-1144374,4018101 ,-1202939,
	4000179 ,-1261252,3981414 ,-1319297,3961805 ,-1377061,3941358 ,-1434536,3920076 ,-1491708,3897968 ,-1548562,3875034 ,-1605089,
	3851277 ,-1661280,3826707 ,-1717115,3801331 ,-1772588,3775146 ,-1827689,3748168 ,-1882399,3720394 ,-1936716,3691831 ,-1990617,
	3662491 ,-2044102,3632372 ,-2097152,3601489 ,-2149761,3569843 ,-2201913,3537438 ,-2253600,3504291 ,-2304808,3470396 ,-2355530,
	3435773 ,-2405752,3400419 ,-2455467,3364348 ,-2504666,3327564 ,-2553329,3290079 ,-2601454,3251894 ,-2649030,3213021 ,-2696048,
	3173473 ,-2742492,3133250 ,-2788357,3092364 ,-2833630,3050828 ,-2878308,3008641 ,-2922373,2965822 ,-2965822,2922373 ,-3008641,
	2878308 ,-3050828,2833630 ,-3092364,2788357 ,-3133250,2742492 ,-3173473,2696048 ,-3213021,2649030 ,-3251894,2601454 ,-3290079,
	2553329 ,-3327564,2504666 ,-3364348,2455467 ,-3400419,2405752 ,-3435773,2355530 ,-3470396,2304808 ,-3504291,2253600 ,-3537438,
	2201913 ,-3569843,2149761 ,-3601489,2097152 ,-3632372,2044102 ,-3662491,1990617 ,-3691831,1936716 ,-3720394,1882399 ,-3748168,
	1827689 ,-3775146,1772588 ,-3801331,1717115 ,-3826707,1661280 ,-3851277,1605089 ,-3875034,1548562 ,-3897968,1491708 ,-3920076,
	1434536 ,-3941358,1377061 ,-3961805,1319297 ,-3981414,1261252 ,-4000179,1202939 ,-4018101,1144374 ,-4035168,1085566 ,-4051387,
	1026527 ,-4066747,967274 ,-4081247,907815 ,-4094882,848164 ,-4107654,788332 ,-4119553,728333 ,-4130584,668182 ,-4140739,
	607889 ,-4150021,547466 ,-4158422,486929 ,-4165942,426288 ,-4172586,365559 ,-4178345,304750 ,-4183218,243878 ,-4187207,
	182951 ,-4190311,121991 ,-4192530,61002 ,-4193859,


	0 ,-4194303,-61002 ,-4193859,-121991 ,-4192530,-182951 ,-4190311,-243878 ,-4187207,-304750 ,-4183218,-365559 ,-4178345,
	-426288 ,-4172586,-486929 ,-4165942,-547466 ,-4158422,-607889 ,-4150021,-668182 ,-4140739,-728333 ,-4130584,-788332 ,-4119553,
	-848164 ,-4107654,-907815 ,-4094882,-967274 ,-4081247,-1026527 ,-4066747,-1085566 ,-4051387,-1144374 ,-4035168,-1202939 ,-4018101,
	-1261252 ,-4000179,-1319297 ,-3981414,-1377061 ,-3961805,-1434536 ,-3941358,-1491708 ,-3920076,-1548562 ,-3897968,-1605089 ,-3875034,
	-1661280 ,-3851277,-1717115 ,-3826707,-1772588 ,-3801331,-1827689 ,-3775146,-1882399 ,-3748168,-1936716 ,-3720394,-1990617 ,-3691831,
	-2044102 ,-3662491,-2097152 ,-3632372,-2149761 ,-3601489,-2201913 ,-3569843,-2253600 ,-3537438,-2304808 ,-3504291,-2355530 ,-3470396,
	-2405752 ,-3435773,-2455467 ,-3400419,-2504666 ,-3364348,-2553329 ,-3327564,-2601454 ,-3290079,-2649030 ,-3251894,-2696048 ,-3213021,
	-2742492 ,-3173473,-2788357 ,-3133250,-2833630 ,-3092364,-2878308 ,-3050828,-2922373 ,-3008641,-2965822 ,-2965822,-3008641 ,-2922373,
	-3050828 ,-2878308,-3092364 ,-2833630,-3133250 ,-2788357,-3173473 ,-2742492,-3213021 ,-2696048,-3251894 ,-2649030,-3290079 ,-2601454,
	-3327564 ,-2553329,-3364348 ,-2504666,-3400419 ,-2455467,-3435773 ,-2405752,-3470396 ,-2355530,-3504291 ,-2304808,-3537438 ,-2253600,
	-3569843 ,-2201913,-3601489 ,-2149761,-3632372 ,-2097152,-3662491 ,-2044102,-3691831 ,-1990617,-3720394 ,-1936716,-3748168 ,-1882399,
	-3775146 ,-1827689,-3801331 ,-1772588,-3826707 ,-1717115,-3851277 ,-1661280,-3875034 ,-1605089,-3897968 ,-1548562,-3920076 ,-1491708,
	-3941358 ,-1434536,-3961805 ,-1377061,-3981414 ,-1319297,-4000179 ,-1261252,-4018101 ,-1202939,-4035168 ,-1144374,-4051387 ,-1085566,
	-4066747 ,-1026527,-4081247 ,-967274,-4094882 ,-907815,-4107654 ,-848164,-4119553 ,-788332,-4130584 ,-728333,-4140739 ,-668182,
	-4150021 ,-607889,-4158422 ,-547466,-4165942 ,-486929,-4172586 ,-426288,-4178345 ,-365559,-4183218 ,-304750,-4187207 ,-243878,
	-4190311 ,-182951,-4192530 ,-121991,-4193859 ,-61002,


	-4194303 ,0,-4193859 ,61002,-4192530 ,121991,-4190311 ,182951,-4187207 ,243878,-4183218 ,304750,-4178345 ,365559,-4172586 ,426288,
	-4165942 ,486929,-4158422 ,547466,-4150021 ,607889,-4140739 ,668182,-4130584 ,728333,-4119553 ,788332,-4107654 ,848164,-4094882 ,907815,
	-4081247 ,967274,-4066747 ,1026527,-4051387 ,1085566,-4035168 ,1144374,-4018101 ,1202939,-4000179 ,1261252,-3981414 ,1319297,-3961805 ,1377061,
	-3941358 ,1434536,-3920076 ,1491708,-3897968 ,1548562,-3875034 ,1605089,-3851277 ,1661280,-3826707 ,1717115,-3801331 ,1772588,-3775146 ,1827689,
	-3748168 ,1882399,-3720394 ,1936716,-3691831 ,1990617,-3662491 ,2044102,-3632372 ,2097152,-3601489 ,2149761,-3569843 ,2201913,-3537438 ,2253600,
	-3504291 ,2304808,-3470396 ,2355530,-3435773 ,2405752,-3400419 ,2455467,-3364348 ,2504666,-3327564 ,2553329,-3290079 ,2601454,
	-3251894 ,2649030,-3213021 ,2696048,-3173473 ,2742492,-3133250 ,2788357,-3092364 ,2833630,-3050828 ,2878308,-3008641 ,2922373,
	-2965822 ,2965822,-2922373 ,3008641,-2878308 ,3050828,-2833630 ,3092364,-2788357 ,3133250,-2742492 ,3173473,-2696048 ,3213021,
	-2649030 ,3251894,-2601454 ,3290079,-2553329 ,3327564,-2504666 ,3364348,-2455467 ,3400419,-2405752 ,3435773,-2355530 ,3470396,
	-2304808 ,3504291,-2253600 ,3537438,-2201913 ,3569843,-2149761 ,3601489,-2097152 ,3632372,-2044102 ,3662491,-1990617 ,3691831,
	-1936716 ,3720394,-1882399 ,3748168,-1827689 ,3775146,-1772588 ,3801331,-1717115 ,3826707,-1661280 ,3851277,-1605089 ,3875034,
	-1548562 ,3897968,-1491708 ,3920076,-1434536 ,3941358,-1377061 ,3961805,-1319297 ,3981414,-1261252 ,4000179,-1202939 ,4018101,
	-1144374 ,4035168,-1085566 ,4051387,-1026527 ,4066747,-967274 ,4081247,-907815 ,4094882,-848164 ,4107654,-788332 ,4119553,
	-728333 ,4130584,-668182 ,4140739,-607889 ,4150021,-547466 ,4158422,-486929 ,4165942,-426288 ,4172586,-365559 ,4178345,
	-304750 ,4183218,-243878 ,4187207,-182951 ,4190311,-121991 ,4192530,-61002 ,4193859,

};
#endif


static void ki_bfly2(int * Fout)
{
	int * Fout2;
#ifndef FFT_23BIT_TABLE
	const short * tw1;
#else
	const int * tw1;
#endif
	int t_0, t_1;
	int j;
	int m = 27;

//#if 0
#ifdef VC_PROJ
	long long tmp_val;
#else
	ae_int64 tmp_val;
#endif
/*
	int *Fout_beg = Fout;

	Fout = Fout_beg; // + mm;
*/
	Fout2 = Fout + (m<<1);

	tw1 = fft_twiddles44100_864;
	j=m;
	do{
//#if 0	//modify yang
#ifdef VC_PROJ
		tmp_val = ((long long)Fout2[0])*(tw1[0]);
		tmp_val = tmp_val + (((long long)Fout2[1])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
		t_0 = (int)(tmp_val>>15);
#else
		t_0 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout2[1])*(tw1[0]);
		tmp_val = tmp_val - (((long long)Fout2[0])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
		t_1 = (int)(tmp_val>>15);
#else
		t_1 = (int)(tmp_val>>22);
#endif
/////////////////////////////////////////////
#else   // #ifdef VC_PROJ
/////////////////////////////////////////////
		tmp_val = AE_MUL32_HH(Fout2[0], tw1[0]);
		AE_MULA32_HH(tmp_val, Fout2[1], tw1[1]);
#ifndef FFT_23BIT_TABLE
		t_0 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		t_0 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout2[1], tw1[0]);
		AE_MULS32_HH(tmp_val, Fout2[0], tw1[1]);
#ifndef FFT_23BIT_TABLE
		t_1 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		t_1 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
/////////////////////////////////////////////
#endif  // #ifdef VC_PROJ
/////////////////////////////////////////////
		tw1 += 16;
		Fout2[0] = Fout[0] - t_0;
		Fout2[1] = Fout[1] - t_1;

		Fout[0] = Fout[0] + t_0;
		Fout[1] = Fout[1] + t_1;

		Fout2+=2;
		Fout+=2;
	}while(--j);
}

static void ki_bfly4_add(int * Fout, int mm)
{
#ifndef FFT_23BIT_TABLE
	const short *tw1,*tw2,*tw3;
#else
	const int *tw1,*tw2,*tw3;
#endif
	int * Fout_beg = Fout;

//#if 0
#ifdef VC_PROJ
	long long tmp_val;
#else
	ae_int64 tmp_val;
#endif

	short m = 27;

	Fout = Fout_beg + mm;
	tw3 = tw2 = tw1 = fft_twiddles44100_864;
	do{
		int tmp1, tmp2, tmp3, tmp4, tmp6, tmp8, tmp10, tmp11;
//#if 0	//modify yang
#ifdef VC_PROJ
		tmp_val = ((long long)Fout[54])*(tw1[0]);
		tmp_val = tmp_val + (((long long)Fout[54+1])*(tw1[1]));

#ifndef FFT_23BIT_TABLE
		tmp1 = (int)(tmp_val>>15);
#else
		tmp1 = (int)(tmp_val>>22);
#endif

		tmp_val = ((long long)Fout[54+1])*(tw1[0]);
		tmp_val = tmp_val - (((long long)Fout[54])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
		tmp2 = (int)(tmp_val>>15);
#else
		tmp2 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[162])*((tw3[0]));
		tmp_val = tmp_val + (((long long)Fout[162+1])*(tw3[1]));
#ifndef FFT_23BIT_TABLE
		tmp3 = (int)(tmp_val>>15);
#else
		tmp3 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[162+1])*(tw3[0]);
		tmp_val = tmp_val - (((long long)Fout[162])*(tw3[1]));
#ifndef FFT_23BIT_TABLE
		tmp4 = (int)(tmp_val>>15);
#else
		tmp4 = (int)(tmp_val>>22);
#endif
/////////////////////////////////////
#else  // #ifdef VC_PROJ
/////////////////////////////////////
		tmp_val = AE_MUL32_HH(Fout[54], tw1[0]);
		AE_MULA32_HH(tmp_val, Fout[54+1], tw1[1]);
#ifndef FFT_23BIT_TABLE
		tmp1 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp1 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[54+1], tw1[0]);
		AE_MULS32_HH(tmp_val, Fout[54], tw1[1]);
#ifndef FFT_23BIT_TABLE
		tmp2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[162], tw3[0]);
		AE_MULA32_HH(tmp_val, Fout[162+1], tw3[1]);
#ifndef FFT_23BIT_TABLE
		tmp3 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp3 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[162+1], tw3[0]);
		AE_MULS32_HH(tmp_val, Fout[162], tw3[1]);
#ifndef FFT_23BIT_TABLE
		tmp4 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp4 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
/////////////////////////////////////
#endif   // #ifdef VC_PROJ
/////////////////////////////////////
		tmp6 = tmp1 + tmp3;
		tmp8 = tmp1 - tmp3;
		tmp3 = tmp2 + tmp4;
		tmp4 = tmp2 - tmp4;

//#if 0	//modify yang

#ifdef VC_PROJ
	    tmp_val = ((long long)Fout[108])*(tw2[0]);
		tmp_val = tmp_val + (((long long)Fout[108+1])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		tmp1 = (int)(tmp_val>>15);
#else
		tmp1 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[108+1])*(tw2[0]);
		tmp_val = tmp_val - (((long long)Fout[108])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		tmp2 = (int)(tmp_val>>15);
#else
		tmp2 = (int)(tmp_val>>22);
#endif
/////////////////////////////////////
#else  // #ifdef VC_PROJ
/////////////////////////////////////
	    tmp_val = AE_MUL32_HH(Fout[108], tw2[0]);
		AE_MULA32_HH(tmp_val, Fout[108+1], tw2[1]);
#ifndef FFT_23BIT_TABLE
		tmp1 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp1 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[108+1], tw2[0]);
		AE_MULS32_HH(tmp_val, Fout[108], tw2[1]);
#ifndef FFT_23BIT_TABLE
		tmp2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		tmp2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
/////////////////////////////////////
#endif  //#ifdef VC_PROJ
/////////////////////////////////////
		tmp10 = Fout[0]-tmp1;
		tmp11 = Fout[1]-tmp2;

		Fout[54] = tmp10 - tmp4;
		Fout[55] = tmp11 + tmp8;

		Fout[162] = tmp10 + tmp4;
		Fout[163] = tmp11 - tmp8;

		Fout[0] = Fout[0]+tmp1;
		Fout[1] = Fout[1]+tmp2;

		Fout[108] = Fout[0] - tmp6;
		Fout[109] = Fout[1] - tmp3;
		Fout[0] = Fout[0] + tmp6;
		Fout[1] = Fout[1] + tmp3;
		tw1 += 8;
		tw2 += 16;
		tw3 += 24;
		Fout+=2;
	}while(--m);
}



static void ki_bfly4(int * Fout, int m)
{
#ifndef FFT_23BIT_TABLE
	const short *tw1,*tw2,*tw3;
#else
	const int *tw1,*tw2,*tw3;
#endif

	int scratch_0,scratch_1,scratch_2,scratch_3,scratch_4,scratch_5;
	int tmp0, tmp1;
	int * Fout_beg = Fout;
//#if 0
#ifdef VC_PROJ
	long long tmp_val;
#else
	ae_int64 tmp_val;
#endif

	Fout = Fout_beg;
	tw3 = tw2 = tw1 = fft_twiddles44100_864;
	while(m--)	
	{
//#if 0	//modify yang
#ifdef VC_PROJ
		tmp_val = ((long long)Fout[432])*(tw2[0]);
		tmp_val = tmp_val + (((long long)Fout[432+1])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		scratch_0 = (int)(tmp_val>>15);
#else
		scratch_0 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[432+1])*(tw2[0]);
		tmp_val = tmp_val - (((long long)Fout[432])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		scratch_1 = (int)(tmp_val>>15);
#else
		scratch_1 = (int)(tmp_val>>22);
#endif
#else
		tmp_val = AE_MUL32_HH(Fout[432], tw2[0]);
		AE_MULA32_HH(tmp_val, Fout[432+1], tw2[1]);
#ifndef FFT_23BIT_TABLE
		scratch_0 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_0 = (int)((long long)AE_SRAI64(tmp_val, 22));

#endif
		tmp_val = AE_MUL32_HH(Fout[432+1], tw2[0]);
		AE_MULS32_HH(tmp_val, Fout[432], tw2[1]);
#ifndef FFT_23BIT_TABLE
		scratch_1 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_1 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

#endif
		tmp0 = Fout[0]-scratch_0;
		tmp1 = Fout[1]-scratch_1;

		Fout[0] = Fout[0]+scratch_0;
		Fout[1] = Fout[1]+scratch_1;

//#if 0	//modify yang
#ifdef VC_PROJ
		tmp_val = ((long long)Fout[216])*(tw1[0]);
		tmp_val = tmp_val + (((long long)Fout[216+1])*(tw1[1]));

#ifndef FFT_23BIT_TABLE
		scratch_0 = (int)(tmp_val>>15);
#else
		scratch_0 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[216+1])*(tw1[0]);
		tmp_val = tmp_val - (((long long)Fout[216])*(tw1[1]));

#ifndef FFT_23BIT_TABLE
		scratch_1 = (int)(tmp_val>>15);
#else
		scratch_1 = (int)(tmp_val>>22);
#endif

		tmp_val = ((long long)Fout[648])*((tw3[0]));
		tmp_val = tmp_val + (((long long)Fout[648+1])*(tw3[1]));
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)(tmp_val>>15);
#else
		scratch_2 = (int)(tmp_val>>22);
#endif

		tmp_val = ((long long)Fout[648+1])*(tw3[0]);
		tmp_val = tmp_val - (((long long)Fout[648])*(tw3[1]));
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)(tmp_val>>15);
#else
		scratch_3 = (int)(tmp_val>>22);
#endif

//////////////////////////////////////////////////////////////////
#else   // #ifdef VC_PROJ
//////////////////////////////////////////////////////////////////
		tmp_val = AE_MUL32_HH(Fout[216], tw1[0]);
		AE_MULA32_HH(tmp_val, Fout[216+1], tw1[1]);
#ifndef FFT_23BIT_TABLE
		scratch_0 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_0 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[216+1], tw1[0]);
		AE_MULS32_HH(tmp_val, Fout[216], tw1[1]);
#ifndef FFT_23BIT_TABLE
		scratch_1 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_1 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[648], tw3[0]);
		AE_MULA32_HH(tmp_val, Fout[648+1], tw3[1]);
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[648+1], tw3[0]);
		AE_MULS32_HH(tmp_val, Fout[648], tw3[1]);
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

#endif

		scratch_4 = scratch_0 + scratch_2;
		scratch_5 = scratch_1 + scratch_3;

		Fout[432] = Fout[0] - scratch_4;
		Fout[433] = Fout[1] - scratch_5;

		Fout[0] = Fout[0] + scratch_4;
		Fout[1] = Fout[1] + scratch_5;
		
		scratch_4 = scratch_0 - scratch_2;
		scratch_5 = scratch_1 - scratch_3;

		Fout[216] = tmp0 - scratch_5;
		Fout[217] = tmp1 + scratch_4;

		Fout[648] = tmp0 + scratch_5;
		Fout[649] = tmp1 - scratch_4;

		tw1 += 2;
		tw2 += 4;
		tw3 += 6;

		Fout+=2;
	}
}

static void ki_bfly3_3(int * Fout, int n)
{
	int k,j;

#ifndef FFT_23BIT_TABLE
	const short *tw1,*tw2;
#else
	const int *tw1,*tw2;
	int epi3 = 3632372;
#endif
//#if 0
#ifdef VC_PROJ
	long long tmp_val;
#else
	ae_int64 tmp_val;
#endif

	int scratch_0,scratch_1,scratch_2,scratch_3,scratch_4,scratch_5,scratch_6,scratch_7;

	for(j=n;j!=0;j--)
	{
		tw1=tw2 = fft_twiddles44100_864;
		k=9;
		do{
//#if 0	//modify yang
#ifdef VC_PROJ
			tmp_val = ((long long)Fout[18])*(tw1[0]);
			tmp_val = tmp_val + (((long long)Fout[19])*(tw1[1]));

#ifndef FFT_23BIT_TABLE
			scratch_2 = (int)(tmp_val>>15);
#else
			scratch_2 = (int)(tmp_val>>22);
#endif
			tmp_val = ((long long)Fout[19])*(tw1[0]);
			tmp_val = tmp_val - (((long long)Fout[18])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
			scratch_3 = (int)(tmp_val>>15);
#else
			scratch_3 = (int)(tmp_val>>22);
#endif

			tmp_val = ((long long)Fout[36])*(tw2[0]);
			tmp_val = tmp_val + (((long long)Fout[37])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
			scratch_4 = (int)(tmp_val>>15);
#else
			scratch_4 = (int)(tmp_val>>22);
#endif
			tmp_val = ((long long)Fout[37])*(tw2[0]);
			tmp_val = tmp_val - (((long long)Fout[36])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
			scratch_5 = (int)(tmp_val>>15);
#else
			scratch_5 = (int)(tmp_val>>22);
#endif

//////////////////////////////////////////////////
#else  // #ifdef VC_PROJ
//////////////////////////////////////////////////

			tmp_val = AE_MUL32_HH(Fout[18], tw1[0]);
			AE_MULA32_HH(tmp_val, Fout[19], tw1[1]);
#ifndef FFT_23BIT_TABLE
			scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
			scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
			tmp_val = AE_MUL32_HH(Fout[19], tw1[0]);
			AE_MULS32_HH(tmp_val, Fout[18], tw1[1]);
#ifndef FFT_23BIT_TABLE
			scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
			scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
			tmp_val = AE_MUL32_HH(Fout[36], tw2[0]);
			AE_MULA32_HH(tmp_val, Fout[37], tw2[1]);
#ifndef FFT_23BIT_TABLE
			scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
			scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
			tmp_val = AE_MUL32_HH(Fout[37], tw2[0]);
			AE_MULS32_HH(tmp_val, Fout[36], tw2[1]);
#ifndef FFT_23BIT_TABLE
			scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
			scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

//////////////////////////////////////////////////
#endif   // #ifdef VC_PROJ
//////////////////////////////////////////////////

			scratch_0 = scratch_2 - scratch_4;
			scratch_1 = scratch_3 - scratch_5;

#ifndef FFT_23BIT_TABLE
			scratch_0 = S_MUL(scratch_0,28378);
			scratch_1 = S_MUL(scratch_1,28378);
#else
#ifdef VC_PROJ
			tmp_val = ((long long)scratch_0)*(epi3);
			scratch_0 = (int)(tmp_val>>22);
			tmp_val = ((long long)scratch_1)*(epi3);
			scratch_1 = (int)(tmp_val>>22);
#else
			tmp_val = AE_MUL32_HH(scratch_0, epi3);
			scratch_0 =  (int)((long long)AE_SRAI64(tmp_val, 22));
			tmp_val = AE_MUL32_HH(scratch_1, epi3);
			scratch_1 =  (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
#endif
			scratch_6 = scratch_2 + scratch_4;
			scratch_7 = scratch_3 + scratch_5;
			Fout[18] = Fout[0] - (scratch_6>>1);
			Fout[19] = Fout[1] - (scratch_7>>1);
			Fout[0] = Fout[0] + scratch_6;
			Fout[1] = Fout[1] + scratch_7;
			Fout[36] = Fout[18]+scratch_1;
			Fout[37] = Fout[19]-scratch_0;
			Fout[18] -= scratch_1;
			Fout[19] += scratch_0;
			tw1 += 32;
			tw2 += 64;
			Fout += 2;
		}while(--k);
		Fout += 36;
	}
}

static void ki_bfly3_2(int * Fout, int N)
{
	int i;
	int m = 6;
	int m2 = 12;
#ifndef FFT_23BIT_TABLE
	const short tw1[4] = {25102,-21063,5690,-32270};
	const short tw2[2] = {-30792,-11207};
	short epi3;
#else
	const int tw1[4] = {3213021,-2696048,728333,-4130584};
	const int tw2[2] = {-3941358,-1434536};
	int epi3;
#endif

//	int scratch[8];
	int scratch_0,scratch_1,scratch_2,scratch_3,scratch_4,scratch_5,scratch_6,scratch_7;
//#if 0
#ifdef VC_PROJ
	long long tmp_val;
#else
	ae_int64 tmp_val;
#endif
	int * Fout_beg = Fout;

#ifndef FFT_23BIT_TABLE
	epi3 = 28378;
#else
	epi3 = 3632372;
#endif

	Fout = Fout_beg;

	i = N;
	do{
		scratch_6 = Fout[m] + Fout[m2];
		scratch_7 = Fout[m+1] + Fout[m2+1];

		scratch_0 = Fout[m] - Fout[m2];
		scratch_1 = Fout[m+1] - Fout[m2+1];

		Fout[m] = Fout[0] - (scratch_6>>1);
		Fout[m+1] = Fout[1] - (scratch_7>>1);

#ifndef FFT_23BIT_TABLE
		scratch_0 = S_MUL(scratch_0,epi3);
		scratch_1 = S_MUL(scratch_1,epi3);
#else
#ifdef VC_PROJ
		tmp_val = ((long long)scratch_0)*(epi3);   
		scratch_0 = (int)(tmp_val>>22);

		tmp_val = ((long long)scratch_1)*(epi3);   
		scratch_1 = (int)(tmp_val>>22);
#else
		tmp_val = AE_MUL32_HH(scratch_0, epi3);
		scratch_0 =  (int)((long long)AE_SRAI64(tmp_val, 22));

		tmp_val = AE_MUL32_HH(scratch_1, epi3);
		scratch_1 =  (int)((long long)AE_SRAI64(tmp_val, 22));

#endif
#endif


		Fout[0] = Fout[0] + scratch_6;
		Fout[1] = Fout[1] + scratch_7;
		Fout[m2] = Fout[m]+scratch_1;
		Fout[m2+1] = Fout[m+1]-scratch_0;

		Fout[m] -= scratch_1;
		Fout[m+1] += scratch_0;

		Fout += 2;
//#if 0	//modify yang
#ifdef VC_PROJ
		tmp_val = ((long long)Fout[m])*(tw1[0]);
		tmp_val = tmp_val + (((long long)Fout[m+1])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)(tmp_val>>15);
#else
		scratch_2 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[m+1])*(tw1[0]);
		tmp_val = tmp_val - (((long long)Fout[m])*(tw1[1]));
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)(tmp_val>>15);
#else
		scratch_3 = (int)(tmp_val>>22);
#endif

		tmp_val = ((long long)Fout[m2])*(tw1[2]);
		tmp_val = tmp_val + (((long long)Fout[m2+1])*(tw1[3]));
#ifndef FFT_23BIT_TABLE
		scratch_4 = (int)(tmp_val>>15);
#else
		scratch_4 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[m2+1])*(tw1[2]);
		tmp_val = tmp_val - (((long long)Fout[m2])*(tw1[3]));
#ifndef FFT_23BIT_TABLE
		scratch_5 = (int)(tmp_val>>15);
#else
		scratch_5 = (int)(tmp_val>>22);
#endif

///////////////////////////////////////////////////////////		
#else  // #ifdef VC_PROJ
///////////////////////////////////////////////////////////
		tmp_val = AE_MUL32_HH(Fout[m], tw1[0]);
		AE_MULA32_HH(tmp_val, Fout[m+1], tw1[1]);
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[m+1], tw1[0]);
		AE_MULS32_HH(tmp_val, Fout[m], tw1[1]);
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

		tmp_val = AE_MUL32_HH(Fout[m2], tw1[2]);
		AE_MULA32_HH(tmp_val, Fout[m2+1], tw1[3]);
#ifndef FFT_23BIT_TABLE
		scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[m2+1], tw1[2]);
		AE_MULS32_HH(tmp_val, Fout[m2], tw1[3]);
#ifndef FFT_23BIT_TABLE
		scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
/////////////////////////////////////////////////////////
#endif // #ifdef VC_PROJ
/////////////////////////////////////////////////////////
		scratch_6 = scratch_2 + scratch_4;
		scratch_7 = scratch_3 + scratch_5;

		scratch_0 = scratch_2 - scratch_4;
		scratch_1 = scratch_3 - scratch_5;
		Fout[m] = Fout[0] - (scratch_6>>1);
		Fout[m+1] = Fout[1] - (scratch_7>>1);
#ifndef FFT_23BIT_TABLE
		scratch_0 = S_MUL(scratch_0,epi3);
		scratch_1 = S_MUL(scratch_1,epi3);
#else

#ifdef VC_PROJ
		tmp_val = ((long long)scratch_0)*(epi3);   
		scratch_0 = (int)(tmp_val>>22);

		tmp_val = ((long long)scratch_1)*(epi3);   
		scratch_1 = (int)(tmp_val>>22);
#else
		tmp_val = AE_MUL32_HH(scratch_0, epi3);
		scratch_0 =  (int)((long long)AE_SRAI64(tmp_val, 22));

		tmp_val = AE_MUL32_HH(scratch_1, epi3);
		scratch_1 =  (int)((long long)AE_SRAI64(tmp_val, 22));

#endif
#endif
		Fout[0] = Fout[0] + scratch_6;
		Fout[1] = Fout[1] + scratch_7;
	
		Fout[m2] = Fout[m]+scratch_1;
		Fout[m2+1] = Fout[m+1]-scratch_0;

		Fout[m] -= scratch_1;
		Fout[m+1] += scratch_0;

		Fout += 2;
		///////////////////////////////////////////////////////////////////////
//#if 0
#ifdef VC_PROJ
		tmp_val = ((long long)Fout[m])*(tw1[2]);
		tmp_val = tmp_val + (((long long)Fout[m+1])*(tw1[3]));
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)(tmp_val>>15);
#else
		scratch_2 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[m+1])*(tw1[2]);
		tmp_val = tmp_val - (((long long)Fout[m])*(tw1[3]));
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)(tmp_val>>15);
#else
		scratch_3 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[m2])*(tw2[0]);
		tmp_val = tmp_val + (((long long)Fout[m2+1])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		scratch_4 = (int)(tmp_val>>15);
#else
		scratch_4 = (int)(tmp_val>>22);
#endif
		tmp_val = ((long long)Fout[m2+1])*(tw2[0]);
		tmp_val = tmp_val - (((long long)Fout[m2])*(tw2[1]));
#ifndef FFT_23BIT_TABLE
		scratch_5 = (int)(tmp_val>>15);
#else
		scratch_5 = (int)(tmp_val>>22);
#endif

//////////////////////////////////////////////////////////
#else    // #ifdef VC_PROJ
//////////////////////////////////////////////////////////

		tmp_val = AE_MUL32_HH(Fout[m], tw1[2]);
		AE_MULA32_HH(tmp_val, Fout[m+1], tw1[3]);
#ifndef FFT_23BIT_TABLE
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_2 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[m+1], tw1[2]);
		AE_MULS32_HH(tmp_val, Fout[m], tw1[3]);
#ifndef FFT_23BIT_TABLE
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_3 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[m2], tw2[0]);
		AE_MULA32_HH(tmp_val, Fout[m2+1], tw2[1]);
#ifndef FFT_23BIT_TABLE
		scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_4 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
		tmp_val = AE_MUL32_HH(Fout[m2+1], tw2[0]);
		AE_MULS32_HH(tmp_val, Fout[m2], tw2[1]);
#ifndef FFT_23BIT_TABLE
		scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 15));
#else
		scratch_5 = (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

//////////////////////////////////////////////////////////
#endif // #ifdef VC_PROJ
//////////////////////////////////////////////////////////


		scratch_6 = scratch_2 + scratch_4;
		scratch_7 = scratch_3 + scratch_5;
		scratch_0 = scratch_2 - scratch_4;
		scratch_1 = scratch_3 - scratch_5;

		Fout[m] = Fout[0] - (scratch_6>>1);
		Fout[m+1] = Fout[1] - (scratch_7>>1);
#ifndef FFT_23BIT_TABLE
		scratch_0 = S_MUL(scratch_0,epi3);
		scratch_1 = S_MUL(scratch_1,epi3);
#else
#ifdef VC_PROJ
		tmp_val = ((long long)scratch_0)*(epi3);   
		scratch_0 = (int)(tmp_val>>22);

		tmp_val = ((long long)scratch_1)*(epi3);   
		scratch_1 = (int)(tmp_val>>22);
#else
		tmp_val = AE_MUL32_HH(scratch_0, epi3);
		scratch_0 =  (int)((long long)AE_SRAI64(tmp_val, 22));

		tmp_val = AE_MUL32_HH(scratch_1, epi3);
		scratch_1 =  (int)((long long)AE_SRAI64(tmp_val, 22));
#endif
#endif
		Fout[0] = Fout[0] + scratch_6;
		Fout[1] = Fout[1] + scratch_7;
		Fout[m2] = Fout[m]+scratch_1;
		Fout[m2+1] = Fout[m+1]-scratch_0;
		Fout[m] -= scratch_1;
		Fout[m+1] += scratch_0;
		Fout += 14;
		////////////////////////////////////////////////////////////////////////////
	}while(--i);
}

static void ki_bfly3_1(int * Fout, int N)
{
	int i;
	int m = 2;
	int m2 = 4;
#ifndef FFT_23BIT_TABLE
	short epi3;
#else
	int epi3;
#endif

	int scratch_0, scratch_1,scratch_2,scratch_3;

#ifndef FFT_23BIT_TABLE
	epi3 = 28378;
#else
	long long tmp_val;
	epi3 = 3632372;
#endif
	i = N;
	do{
		scratch_2 = Fout[m] + Fout[m2];
		scratch_3 = Fout[m+1] +  Fout[m2+1];
		scratch_0 = Fout[m] - Fout[m2];
		scratch_1 = Fout[m+1] -  Fout[m2+1];
		Fout[m] = Fout[0] - (scratch_2>>1);
		Fout[m+1] = Fout[1] - (scratch_3>>1);
#ifndef FFT_23BIT_TABLE
		scratch_0 = S_MUL(scratch_0,epi3);
		scratch_1 = S_MUL(scratch_1,epi3);
#else

#ifdef VC_PROJ
		tmp_val = ((long long)scratch_0)*(epi3);   
		scratch_0 = (int)(tmp_val>>22);

		tmp_val = ((long long)scratch_1)*(epi3);   
		scratch_1 = (int)(tmp_val>>22);
#else
		tmp_val = AE_MUL32_HH(scratch_0, epi3);
		scratch_0 =  (int)((long long)AE_SRAI64(tmp_val, 22));

		tmp_val = AE_MUL32_HH(scratch_1, epi3);
		scratch_1 =  (int)((long long)AE_SRAI64(tmp_val, 22));
#endif

#endif
		
		Fout[0] = Fout[0] + scratch_2;
		Fout[1] = Fout[1] + scratch_3;
		Fout[m2] = Fout[m]+scratch_1;
		Fout[m2+1] = Fout[m+1]-scratch_0;
		Fout[m] -= scratch_1;
		Fout[m+1] += scratch_0;
		Fout += 6;
	}while(--i);
}

void ssc_ifft(const kiss_fft_state *st,int *fin,int *fout)
{
	int i;
	short *tmp = (short *)st->bitrev;
	int *_fout;
	int *_fin = fin;

	for(i=st->nfft;i--;)
	{
		_fout = fout+(*tmp++);
		*_fout++=*_fin++;
		*_fout++=*_fin;
		_fin = _fin+((st->nfft)<<1)-1;
		*_fout++ = *_fin++;
		*_fout++ = *_fin;
		_fin = _fin+((st->nfft)<<1)-1;
		*_fout++ = *_fin++;
		*_fout++ = *_fin;
		_fin = _fin+1-(st->nfft<<2);
	}

	if(st->shift)
	{
		ki_bfly3_1(fout, 18);
		ki_bfly3_2(fout, 6);
		ki_bfly3_3(fout,2);
		ki_bfly2(fout);		
	}
	else
	{
		 ki_bfly3_1(fout, 144);
		 ki_bfly3_2(fout, 48);
		 ki_bfly3_3(fout, 16);
		 ki_bfly4_add(fout, 0);
		 ki_bfly4_add(fout, 216);
		 ki_bfly4_add(fout, 432);
		 ki_bfly4_add(fout, 648);
		 ki_bfly4(fout,108);
	}
}
