
#include "bands.h"
#include "ssc_modes.h"
#include "basic_op.h"
#include "math_op.h"
#include "ssc_pulsealloc.h"
#include "vector_quant.h"
#include "config.h"

//extern unsigned char *test_comp;
//extern int count;

static const short cache_index[16] = 
{
	0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   41,  41,  41, 67, 67, 80, 
};

static const unsigned short cache_bits[90] = 
{
	40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, 97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, 142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 
	25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, 185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 
	12, 39, 71, 99, 123, 144, 164, 182, 198, 214, 228, 241, 253, 
	9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 
};

struct band_ctx
{
	const SpeechMode *m;
	short i;
	ec_ctx *ec;
	short remaining_bits;
	unsigned int seed;
};

static unsigned int speech_lcg_rand(unsigned int seed)
{
#ifndef HW_HIFI3
   unsigned int use_rMAC = 341*seed + 8141461;
#else
   unsigned int use_rMAC = FFT_MUL_16_32((int)seed,341) + 8141461;

#endif
   return (use_rMAC & 0x7FFFFF);
}
#ifdef eBands_add_BX
	short eBands_BX[] =
	{
		//0,1,2,3,4,5,6,7,8,9,10,12,14,16,20,24,30,
		4,4,4,4,4,4,4,4,4,4,8,8,8,16,16,24,
	};
#endif
void compute_band_energies(const SpeechMode *m, const int *X, int *bandE, short end)
{
	short i;
	short M = 2;
	const short *eBands = m->eBands;

	for (i=0;i<end;i++)
	{
#ifndef HW_CODESIZE
		short j;
		int maxval=0;
#else
		int maxval;
#endif

		int sum = 0;

#ifndef HW_ADDITIONAL_ETC
		j=eBands[i]<<M; 
		do{
			maxval = SPEECH_MAX(maxval, X[j]);
			maxval = SPEECH_MAX(maxval, -X[j]);
		}while(++j<(eBands[i+1]<<M));
#else
#ifdef		eBands_add_BX
		short bandwidth=eBands_BX[i];
#else

		short bandwidth = (eBands[i+1]-eBands[i])<<M;
#endif

#ifndef HW_CODESIZE
		j = bandwidth;
		do 
		{	
			maxval = SPEECH_MAX(maxval, *X);
			maxval = SPEECH_MAX(maxval, -(*X));
			X++;
		} while (--j);
#else

		maxval = speech_maxabs32(X, bandwidth);
#endif

#endif

		if (maxval > 0)
		{
#ifndef HW_CODESIZE
			short shift = EC_ILOG(maxval)-11;
#else
#ifndef VC_PROJ
			short shift = (20-AE_NSAZ32_L(maxval));
#else
			short shift = EC_ILOG(maxval)-11;
#endif
#endif

#ifndef HW_ADDITIONAL_ETC
			j=eBands[i]<<M;
			do{
				sum += SSC_MULT16x16(EXTRACT16(SSC_VSHR(X[j],shift)),
									EXTRACT16(SSC_VSHR(X[j],shift)));
			}while(++j<eBands[i+1]<<M);
#else

#ifndef HW_CODESIZE
			X = X-bandwidth;
#endif
#ifdef compute_band_energies_dowhile_BX
//			do{
//				   sum += (int)(SSC_MULT16x16(EXTRACT16(SSC_VSHR(*X,shift)),EXTRACT16(SSC_VSHR(*X,shift))));
//				   X++;
//			   }while(--bandwidth);
//##################################################################

						ae_valign    align1;
						ae_int32x2  yp01,tmp,tmp1,sum32x2;


						ae_int32x2  *yp_x=(ae_int32x2 *)X;
						align1 = AE_LA64_PP(yp_x);
						int sum1;
						X+=bandwidth;
//					    sum32x2=AE_MOVDA32X2(sum,0);

						for(;bandwidth>0;bandwidth-=2)
						{
						  AE_LA32X2_IP(yp01, align1, yp_x);
						  tmp = AE_SRAA32(yp01, shift);
						  tmp1=AE_MULP32X2(tmp, tmp);
						  tmp1=AE_ADD32_HL_LH(tmp1,tmp1);
						  sum1 = AE_MOVAD32_L(tmp1);//获取低位的值
						  sum+=sum1;


//						  AE_LA32X2_IP(yp01, align1, yp_x);
//						  tmp = AE_SRAA32(yp01, shift);
//						  AE_MULAP32X2(sum32x2,tmp, tmp);
//						  tmp1=AE_ADD32_HL_LH(sum32x2,sum32x2);
//						  sum = AE_MOVAD32_L(tmp1);//获取低位的值
//						  sum32x2=AE_MOVDA32X2(sum,0)
						}

#else
			do{
#ifndef HW_HIFI3
				sum += SSC_MULT16x16(EXTRACT16(SSC_VSHR(*X,shift)),EXTRACT16(SSC_VSHR(*X,shift)));
#else
				sum += (int)(SSC_MULT16x16(EXTRACT16(SSC_VSHR(*X,shift)),EXTRACT16(SSC_VSHR(*X,shift))));
#endif
				X++;
			}while(--bandwidth);


#endif


#endif

			bandE[i] = EPSILON+SSC_VSHR(EXTEND32(speech_sqrt(sum)),-shift);
		}
		else
		{
			bandE[i] = EPSILON;
#ifdef HW_CODESIZE
			X = X+bandwidth;
#endif
		}
	}
}

/* Normalise each band such that the energy is one. */
void normalise_bands(const SpeechMode *m, const int *freq, short *X, const int *bandE, short end)
{
	short i;
	const short *eBands = m->eBands;


	//int test_xx;
	i=0;
	
	do{
		short g, j, shift, E;
#ifndef HW_CODESIZE
		shift = EC_ILOG(bandE[i])-14;
#else
#ifndef VC_PROJ
		shift = (17-AE_NSAZ32_L(bandE[i]));
#else
		shift = EC_ILOG(bandE[i])-14;
#endif
#endif
		E = SSC_VSHR(bandE[i], shift);


//		test = SHL(E,3);

#ifndef HW_REMOVE_RCP
		g = EXTRACT16(speech_rcp(SHL(E,3)));
#else
		g = (268435455/E);
//		test_xx = 32767/ ((E>>13)+1);
#endif


#ifndef HW_ADDITIONAL_ETC
		j=eBands[i]<<2; do {
			X[j] = SSC_MULT16x16_Q15(SSC_VSHR(freq[j],shift-1),g);
		} while (++j<eBands[i+1]<<2);
#else
#ifdef	eBands_add_BX
		j=eBands_BX[i];

#else
		j=(eBands[i+1]-eBands[i])<<2;
#endif
#ifdef normalise_bands_dowhile_BX

	    ae_valign   align1,align2;
		ae_int32x2  *yp_freq=(ae_int32x2 *)freq;
		ae_int32x2  yp01, yp23;
		ae_int32x2 g_32x2= AE_MOVDA32X2(g,g);
		align1 = AE_LA64_PP(yp_freq);
		align2 = AE_ZALIGN64();
		ae_int16x4 *yp_x=(ae_int16x4 *)X;
		ae_int16x4 tmp;

		X+=j;
		freq+=j;
		for (;j>0;j-=4)
		{
		   AE_LA32X2_IP(yp01, align1, yp_freq);
		   AE_LA32X2_IP(yp23, align1, yp_freq);
		   yp01=AE_SRAA32(yp01,shift-1);
		   yp23=AE_SRAA32(yp23,shift-1);
		   yp01=AE_MULP32X2(yp01,g_32x2);
		   yp23=AE_MULP32X2(yp23,g_32x2);
		   yp01=AE_SRAA32(yp01, 15);
		   yp23=AE_SRAA32(yp23, 15);
		   tmp = AE_SAT16X4(yp01,yp23);//将两个32*2饱和成16x4
//		   tmp4=AE_CVT16X4(yp23, yp01);//截断低16bit的数据

		   AE_SA16X4_IP(tmp, align2, yp_x);
		}
		AE_SA64POS_FP(align2, yp_x);
#else
		//j=(eBands[i+1]-eBands[i])<<2;
		do 
		{
			*X++ = SSC_MULT16x16_Q15(SSC_VSHR(*freq++,shift-1),g);
		} while (--j);
#endif
#endif

	} while (++i<end);
}




static short bits2pulses(const unsigned short *cache ,short bits)
{
	short lo, hi;
#ifdef HW_HIFI3
	short mid;
#else
	short i;
#endif
	lo = 0;

	bits--;

//	bits = bits-2;

	hi = *cache;
#ifndef HW_HIFI3
	i=LOG_MAX_PSEUDO;

	do
	{
		short mid= (lo+hi+1)>>1;
		if (cache[mid] >= bits) hi = mid;
		else lo = mid;
	} while (--i);
#else
	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;

	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;

	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;

	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;

	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;

	mid = (lo+hi+1)>>1;
	if (cache[mid] >= bits) hi = mid;
	else lo = mid;
#endif


//	if (bits-(lo==0 ? -1 : cache[lo]) <= cache[hi]-bits) return lo;
	if (bits-(lo==0 ? -1 : cache[lo]) <= cache[hi]-bits) return lo;
	else return hi;
}


short common_quant_partition(short b,struct band_ctx *ctx)
{
	const unsigned short *cache;
	short i,curr_bits,q;

	
	i = ctx->i;
	cache = cache_bits + cache_index[i];
	q = bits2pulses(cache, b);
	curr_bits = (q == 0) ? 0 : cache[q]+1;
	ctx->remaining_bits -= curr_bits;

	while (ctx->remaining_bits < 0 && q > 0)
	{
		ctx->remaining_bits += curr_bits;
		q--;
		curr_bits = (q == 0) ? 0 : cache[q]+1;
		ctx->remaining_bits -= curr_bits;
	}

	return q;
};


#ifndef HW_EXPROTATION_LAST
static void quant_partition(struct band_ctx *ctx, short *X, short N, short b)
#else
static void quant_partition(struct band_ctx *ctx, short *X, short N, short b, short exp_flag)
#endif
{
	short q; //, curr_bits;

	ec_ctx *ec;
	ec = ctx->ec;

	q = common_quant_partition(b,ctx);


	if (q!=0)
	{
		/* Finally do the actual quantization */ 
#ifndef HW_24BIT
		short K = num_pulses(q);
#else
		short K = num_pulses(q,N);
#endif

#ifndef HW_EXPROTATION_LAST
		alg_quant(X, N, K, ec);
#else
		alg_quant(X, N, K, ec, exp_flag);
#endif


	}
}

#ifndef HW_EXPROTATION_LAST
void quant_all_bands(const SpeechMode *m, short end, short *X_, short *pulses,
	short total_bits, short balance, ec_ctx *ec, short codedBands, unsigned int *seed)
#else

#ifndef HW_CODESIZE
void quant_all_bands(const SpeechMode *m, short end,
	short *X_, short *pulses,
	short total_bits, short balance, ec_ctx *ec, short codedBands, unsigned int *seed, short *exp_flag)
#else
void quant_all_bands(const SpeechMode *m, short end,
	short *X_, short *pulses,
	short total_bits, short balance, ec_ctx *ec, short codedBands, short *exp_flag)
#endif

#endif
{
	short i, remaining_bits, M;
	const short *eBands = m->eBands;
	struct band_ctx ctx;

	M = 2;
	ctx.ec = ec;
	ctx.m = m;

#ifndef HW_CODESIZE
	ctx.seed = *seed;
#endif
	for(i=0;i<end;i++)   // 1
	{
		short tell, b, N;
		short curr_balance;
		short *X; 
		ctx.i = i;

		X = X_+(eBands[i]<<M);
#ifdef	eBands_add_BX
		N=eBands_BX[i];

#else
		N = (eBands[i+1]-eBands[i])<<M;
#endif

		tell = ec_tell_frac(ec);

		/* Compute how many bits we want to allocate to this band */
		if (i!=0) balance -= tell;
		remaining_bits = total_bits-tell-1;
		ctx.remaining_bits = remaining_bits;
		if (i<codedBands)
		{
			curr_balance = balance / SPEECH_MIN(3, codedBands-i);
			b = SPEECH_MAX(0,SPEECH_MIN(remaining_bits+1,pulses[i]+curr_balance));
		} else {
			b = 0;
		}

#ifndef HW_EXPROTATION_LAST		
		quant_partition(&ctx, X, N, b);
#else


		if(i<13)
		{
			quant_partition(&ctx, X, N, b,1);
		}
		else
		{
			quant_partition(&ctx, X, N, b,exp_flag[i-13]);
		}

#endif
		
		balance += pulses[i] + tell;
	}
#ifndef HW_CODESIZE
	*seed = ctx.seed;
#endif
}









void denormalise_bands(const SpeechMode *m, const short *X,
	int *freq, const short *bandLogE, short end )
{
	short i;
	const short *eBands = m->eBands;
	int *f;
	const short *x;
	f = freq;
	x = X;
	for (i=0;i<end;i++)
	{
		short j, band_end;
		short shift, g, lg;
		j = eBands[i]<<2;
		band_end = eBands[i+1]<<2;
		lg = SSC_ADD(bandLogE[i], SHL((short)eMeans[i],6));
		/* Handle the integer part of the log energy */
		shift = 16-(lg>>DB_SHIFT);
		if (shift>31)
		{
			shift=0;
			g=0;
		} else {
#ifndef AFTER_190327_OPTI
			g = speech_exp2_frac(lg&(1023));
			g = g>>8;
#else
			g = (lg&(1023)) << 4;
			g = SSC_ADD(16383, SSC_MULT16x16_Q15(g, SSC_ADD(22804, SSC_MULT16x16_Q15(g, SSC_ADD(14819 , SSC_MULT16x16_Q15(10204,g))))));
			g = g>>8;
#endif
		}
		/* Handle extreme gains with negative shift. */



#ifndef HW_CODESIZE
		if (shift<0)
		{
			if (shift < -2)
			{
				g = 32767;
				shift = -2;
			}
			do{
				*f++ = SHL(SSC_MULT16x16(*x++, g), -shift);
			}while (++j<band_end);
		} else{
			/* Be careful of the fixed-point "else" just above when changing this code */
			do{
				*f++ = SHR(SSC_MULT16x16(*x++, g), shift);
			}while (++j<band_end);
		}
#else

#ifdef denormalise_bands_dowhile_BX
		ae_int16x4 t_vec_1_16x4_v;
		ae_valign    align3;
		ae_int16x4* t_vec_1_16x4 = (ae_int16x4 *)(x);
		ae_int32x2* t_vec_2_32x2 = (ae_int32x2 *)(f);
		ae_valign align1 = AE_LA64_PP(t_vec_1_16x4);
		align3 = AE_ZALIGN64();
		ae_f16x4 gggg = AE_MOVDA16(g);
		ae_f32x2 tmp1,tmp2;

		for(;j<band_end;j+=4)
		{
			AE_LA16X4_IP(t_vec_1_16x4_v,align1,t_vec_1_16x4);
			AE_MUL16X4(tmp1,tmp2,t_vec_1_16x4_v, gggg);
			tmp1 = AE_SRAA32(tmp1, shift);
			tmp2 = AE_SRAA32(tmp2, shift);
			AE_SA32X2_IP(tmp1, align3, t_vec_2_32x2);
			AE_SA32X2_IP(tmp2, align3, t_vec_2_32x2);
            f+=4;
            x+=4;
		}


#else
		do{
			*f++ = SSC_VSHR(SSC_MULT16x16(*x++, g), shift);

		}while (++j<band_end);
#endif

#endif






	}
}





static void quant_partition_dec(struct band_ctx *ctx, short *X,
	short N, short b, short *lowband, short *oldBandE )

{
	short  i,q;

	ec_ctx *ec;

#ifdef HW_EXPROTATION_LAST
	short exp_flag; // =  && oldBandE[15]<7800;
#endif

	ec = ctx->ec;
	i = ctx->i;
#ifdef HW_EXPROTATION_LAST



	if(i<13)
		exp_flag = 1;
	else
		exp_flag = (oldBandE[i]<7800);

#endif



	q = common_quant_partition(b,ctx);

	if (q!=0)
    {
#ifndef HW_24BIT
		short K = num_pulses(q);
#else
		short K = num_pulses(q,N);
#endif

#ifndef HW_EXPROTATION_LAST
		alg_unquant(X, N, K, ec);
#else
		 alg_unquant(X, N, K, ec,exp_flag);
#endif
    }
	else 
	{
		/* If there's no pulse, fill the band anyway */
		int j;  
#ifndef HW_DEC_RANDOM
		short hw_flag = 0;
#endif
		if (lowband == NULL)
		{
			/* Noise */
			for (j=0;j<N;j++)
			{

				ctx->seed = speech_lcg_rand(ctx->seed);
				X[j] = (short)((int)ctx->seed>>20);

			}
		}   
		else
		{


			 ctx->seed = speech_lcg_rand(ctx->seed);

			 for (j=0;j<N;j++)
			 {
				 short tmp = ((N-j)&1)<<3;    //  (N-j) = r10
				 tmp = tmp-4;
				 X[j] = lowband[j]+tmp;
			 }


		}

		renormalise_vector(X, N);
	}
}


void quant_all_bands_dec(const SpeechMode *m, short end, short *X_, short *_norm,  short *pulses,
      short total_bits, short balance, ec_ctx *ec, short codedBands, unsigned int *seed, short *oldBandE)
{
	const short *eBands = m->eBands;
	short *norm;
	short i;
	short remaining_bits;
	short M;
	short lowband_offset;
	short update_lowband = 1;
	short *lowband_out;
	struct band_ctx ctx;
   
	M = 2;
	norm = _norm;
	lowband_offset = 0;
	ctx.ec = ec;
	ctx.m = m;


	ctx.seed = *seed;


	for (i=0;i<end;i++)
	{
		short tell;
#ifndef AFTER_190327_OPTI
		short b, N;
#else
		short b = 0, N;
#endif
		short curr_balance;
		short effective_lowband=-1;
		short last;
		short *X;  

		ctx.i = i;
		last=(i==end-1);
		X = X_+(eBands[i]<<M);
#ifdef	eBands_add_BX
		N =eBands_BX[i];
#else
		N = (eBands[i+1]-eBands[i])<<M;
#endif
		tell = ec_tell_frac(ec);

		/* Compute how many bits we want to allocate to this band */
		if (i != 0) balance -= tell;

		remaining_bits = total_bits-tell-1;
		ctx.remaining_bits = remaining_bits;
		if (i <= codedBands-1)
		{
			curr_balance = balance / SPEECH_MIN(3, codedBands-i);
			b = SPEECH_MAX(0, SPEECH_MIN(remaining_bits+1,pulses[i]+curr_balance));
		}

#ifndef AFTER_190327_OPTI
		else {
			b = 0;
		}
#endif
		if ((eBands[i]<<M)-N >= 0 && (update_lowband || lowband_offset==0))
			lowband_offset = i;

		/* Get a conservative estimate of the collapse_mask's for the bands we're
			going to be folding from. */
		if (lowband_offset != 0)
		{     
			effective_lowband = SPEECH_MAX(0, (eBands[lowband_offset]<<M)-N);  
		}
		quant_partition_dec(&ctx, X, N, b, effective_lowband != -1 ? norm+effective_lowband : NULL, oldBandE	 );

		lowband_out =  last?NULL:norm+(eBands[i]<<M);
		if (lowband_out)
		{
			short j, n;
			switch(N)
			{
			case 4:
				n=4097;
				break;
			case 8:
				n=5793;
				break;
			case 16:
				n=8194;
				break;
/*
			default:
				printf("xx");
				break;
*/
			}
#ifndef quant_all_bands_dec_loop_opt_hifi3_ZH
			for (j=N-1;j>=0;j--)
				lowband_out[j] = SSC_MULT16x16_Q15(n,X[j]);
#else
			{
				ae_int16x4 *pt1, *pt2, data, nv, tmps;
				ae_valign align1, align2;
				pt1 = (ae_int16x4*)(&X[N-1]);
				pt2 = (ae_int16x4*)(&lowband_out[N-1]);
				nv = AE_MOV16(n);
				align1 = AE_LA64_PP(pt1);
				align2 = AE_ZALIGN64();
				for (j=N-1;j>=0;j-=4)
				{
					AE_LA16X4_RIP(data, align1, pt1);
					tmps = AE_MULFP16X4S(nv, data);
					AE_SA16X4_RIP(tmps, align2, pt2);

				}
			}
#endif

		}
		balance += pulses[i] + tell;
		update_lowband = b>(N<<BITRES);
	}

	*seed = ctx.seed;

}


