//#ifdef HAVE_CONFIG_H
#include "config.h"
//#endif

#include "ssc_int32.h"
#include "ssc_mode.h"
#include "vector_quant.h"
#include "ssc_cwrs.h"

#include "ssc_math.h"
#include "ssc_pulsealloc.h"
#include "ssc_quant.h"

#ifdef FRAME_CNT_ON
extern int frame_cnt;
#endif
//extern int *X_pointer;			// 1728+144

extern short *SSC_restricted_local;

//short test_restricted_local[2000];



unsigned int audio_lcg_rand(unsigned int seed)
{
	unsigned int tmp;
	unsigned long long use_rMAC = 6502 * seed + 8141461;        //(use_rMAC) + 8141461;

	use_rMAC = use_rMAC>>1;
	tmp = (unsigned int)(use_rMAC & 0x7FFFFF);

	return tmp;
}

/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
   with this appro ximation is important because it has an impact on the bit allocation */
static short bitexact_cos(short x)
{
//   int test1,test2;
   int tmp;
   short x2;
//#if 0
#ifdef VC_PROJ
   tmp = (4096+((int)(x)*(x)))>>13;
#else
   tmp = AE_MULA16S_scalar(4096, x, x) >> 13;
#endif

   audio_assert(tmp<=32767);
   x2 = tmp;
//#if 0
#ifdef VC_PROJ
   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
#else
   x2 = AE_MULA16S_scalar((32767-x2), FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, AE_MULA16S_scalar(8277, FRAC_MUL16(-626, x2), 1)))), 1);
#endif

   audio_assert(x2<=32766);
   return 1+x2;
}


static int bitexact_log2tan(int isin,int icos)
{
   int lc;
   int ls;
   lc=EC_ILOG(icos);
   ls=EC_ILOG(isin);
   icos<<=15-lc;
   isin<<=15-ls;
#ifndef HW_OPTI3
//#if 0
#ifdef VC_PROJ
   return (ls-lc)*(1<<11)
         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
#else
   return (ls-lc)*(1<<11)
         +FRAC_MUL16(isin, AE_MULA16S_scalar(7932, FRAC_MUL16(isin, -2597), 1))
         -FRAC_MUL16(icos, AE_MULA16S_scalar(7932, FRAC_MUL16(icos, -2597), 1));
#endif
#else
   return ((ls-lc)<<11)
         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
#endif
}

void mono_band_denorm(const AUDIOMode *m, const short * x,
	int * freq, const short *bandLogE, int effend)
{
   int i = 0;
   const short *ptr_bands = m->eBands;


	  while (i<effend) 
	  { 
         short g_first, g_second;
         int st_flag;
		 int tt; 

         tt = (ptr_bands[i+1]-ptr_bands[i])<<3;
         

		 g_second = (short)(bandLogE[i] + SSC_SHL16((short)eMeans[i],6));

		 st_flag = 22-(g_second>>SHIFT_10);


		  if (st_flag<32)
		  {
			   g_first = fractional_ex2(g_second&(1023));
		  }
		  else
		  {
			  st_flag=0;
			  g_first=0;
		  }


		  if (st_flag>-1)
		  {
			  do 
			  {
				  *freq++ = (SSC_MULT16x16(*x++, g_first)>>st_flag);
			  } while (--tt);
		  }
		  else
		  {
			  if (st_flag < -2)
			  {
				  st_flag = -2;
				  g_first = 32767;	
			  }
			  do {
				  *freq++ = (SSC_MULT16x16(*x++, g_first)<<(-st_flag));
			  } while (--tt);
		  }


		 i++;
      }

      i=48;
      do
      {
    	  *freq++ = 0;
      }
      while(--i);
}



void coll_prevent(const AUDIOMode *mode, short *X_, unsigned char *collapse_masks, int size,
	short effEnd, short *logE, short *first_past_Elog,
	short *second_past_Elog, short *bits, unsigned int seed)
{
	int sh;
	int thresh_large;
	int N_zero;
	short thresh_small, first_sqrt;
	int dep;

   short i = 0, k;
   short ch, j;



 //  for (i=0;i<effEnd;i++)
   while(i<effEnd)
   {

	  int hwk;
	  int e_val;
	  short yu;
      N_zero = mode->eBands[i+1]-mode->eBands[i];
	  ch = 0;
      /* dep in 1/8 bits */
	  dep = (1+bits[i])/((mode->eBands[i+1]-mode->eBands[i])<<3);

      thresh_large = ((second_exp(-(dep<<7)))>>1);
	  thresh_small = (MIN32(32767,thresh_large))>>1;



	  hwk = N_zero<<3;

	  sh = eclog_minus1(hwk)>>1;
      hwk = (hwk<<((7-sh)<<1));
      first_sqrt = audio_rsqrt_norm(hwk);

      do {
		 short *X;
		 short first_past;
         short second_past;

         int renormalize=0;

		 yu = 0;
         first_past = first_past_Elog[ch*mode->nbEBands+i];
         second_past = second_past_Elog[ch*mode->nbEBands+i];

         e_val = (int)(logE[ch*mode->nbEBands+i])-(int)(MIN16(first_past,second_past));
         e_val = MAX32(0, e_val);

         if (e_val < 16384)
         {
            yu = (MIN16(16383,(second_exp(-(short)(e_val))>>1))<<1);
         } 
       
		 yu = SSC_MULT16x16_Q14(23170, MIN32(23169, yu));
		 yu = SSC_SHR16(MIN16(thresh_small, yu),1);
         yu = SSC_SHR32(SSC_MULT16x16_Q15(first_sqrt, yu),sh);
//#if 0
#ifdef VC_PROJ
		 X = X_+ch*size+(mode->eBands[i]<<3);
#else
		 X = X_ + (AE_MULA16S_scalar(mode->eBands[i]<<3, ch, size));
#endif
		 for (k=0;k<8;k++)	
         {
            /* Detect collapse */
			if (!(collapse_masks[(i<<1)+ch]&1<<k))
            {
               /* Fill with noise */
#ifdef HW_FOR_BOXSLIM
			   seed = audio_lcg_rand(seed);
#endif

               for (j=0;j<N_zero;j++)
               {
#ifndef HW_FOR_BOXSLIM
                  seed = audio_lcg_rand(seed);
				  X[(j<<3)+k] = (seed&0x8000 ? yu : -yu);
#else
				  short tmp_r;
				  tmp_r = ((seed>>(N_zero-j))&2)-1;
				  X[(j<<3)+k] = yu*tmp_r;
#endif
               }
               renormalize = 1;
            }
         }
         /* We just added some energy, so we need to renormalise */

		 if (renormalize)
			  renormalise_vector(X, N_zero<<3, 32767);
      } while (++ch<2);


	  i++;
   }
}

static void combine_two_ch(short *X, short *Y, short mid, int N)
{
	int i,j;
	int xp=0, side=0;
	int El, Er;
	short mid2;
	int kl, kr;
	int t, lgain, rgain;
	int xy01=0, xy02=0;    // rmac »ç¿ë.
	int xy01_upper, xy01_under;
	long long use_rMACB = 0;
	short residu1, residu2, residu3, residu4;
	int use_rmac = 0;
#if 1	//hoon_test 1206
	for (i=0;i<N;i++)
	{
		xy01 = SSC_MAC16x16_tmp(xy01, Y[i], X[i]<<1);   // rMAC
		xy02 = SSC_MAC16x16_tmp(xy02, Y[i], Y[i]);     //rMACB
	}
#else
	ae_int16x4 *yptr_16x4 = (ae_int16x4 *)(Y);
	ae_int16x4 *xptr_16x4 = (ae_int16x4 *)(X);
	ae_int16x4 tmp_16x4;
	ae_int32x4 xy01_32x4 = 0;
    ae_int32x4 xy02_32x4 = 0;

	for (i=0;i < (N>>2);i++)
	{
		tmp_16x4 = (ae_int16x4)(AE_INT32X4_SLAI32( (ae_int32x4)(*xptr_16x4), 1));
		AE_MULA16X4_vector(xy01_32x4, *yptr_16x4, tmp_16x4);
		AE_MULA16X4_vector(xy02_32x4, *yptr_16x4, *yptr_16x4);
		xptr_16x4++;
		yptr_16x4++;
	}
	xy01 = AE_INT32X4_RADD(xy01_32x4);
	xy02 = AE_INT32X4_RADD(xy02_32x4);
#endif
	residu2 = xy02 & 1023; 
	side = xy02>>10;   // rMACB  »ç¿ë ³¡. µÚ¿¡ °ö¼À ¿ëµµ·Î »ç¿ë.

	xy01_upper = xy01>>24;   //rMAC1 ==> xy01Àº ÃÖ°í 32bit(½ÇÁúÀûÀ¸·Î 30bitÀ» ³ÑÀºÀû ¾øÀ½) ÀÌ°í, 24bit ³»¸®¸é, ¾îÂ¶µç 8bit 
	xy01_under = xy01 & 16777215;  // rMAC0

#if 0
	xy01_upper = xy01_upper*mid;     // xy01_upper´Â ÃÖ°í 8bit, mid´Â ÃÖ°í 15bit µÑÀÌ °öÇØµµ ÃÖ°í 23bit ÀÌ³»....
	use_rMACB = (long long)xy01_under*mid;
#else
	xy01_upper = SSC_MULT16x16(xy01_upper, mid);     // xy01_upper´Â ÃÖ°í 8bit, mid´Â ÃÖ°í 15bit µÑÀÌ °öÇØµµ ÃÖ°í 23bit ÀÌ³»....
	use_rMACB = (long long)SSC_MULT16x32(xy01_under, mid);
#endif
	xy01_under = (int)(use_rMACB>>15);

	xy01 = xy01_under;
#ifndef HW_OPTI3	
	xy01 = xy01+xy01_upper*(1<<9);
#else
	xy01 = xy01+(xy01_upper<<9);
#endif

	residu1 = xy01 & 1023;   // xy01Àº rmac ÀÌ°í, 10bit¸¸ »©¿À¸é µÅ´Ï, rmac0¸¦ ÀÌ¿ë.
	residu3 = residu2+residu1;
	residu4 = residu2-residu1;

	xp = xy01>>10;

	mid2 = SSC_SHR32(mid, 1);

	use_rmac = SSC_MULT16x16(mid2, mid2);
   residu1 = use_rmac & 1023; // rmac0 ¸¦ ÀÌ¿ëÇÑ´Ù. 

   residu3 = (residu3+residu1)>>10;
   residu4 = (residu4+residu1)>>10;


   El = (use_rmac>>10) + side - xp;
   Er = (use_rmac>>10) + side + xp;

   El = El+residu4;
   Er = Er+residu3;

	if (Er < 157 || El < 157)
	{
		for (j=0;j<N;j++)
			Y[j] = X[j];
		return;
	}

	kl = (EC_ILOG(El)+9)>>1;
	kr = (EC_ILOG(Er)+9)>>1;

	t = SSC_VSHR32(El, ((kl-7)<<1)-10 );   // ElÀº ÀÌ¹Ì 10bitÀÌ ³»·Á°¡ ÀÖÀ¸´Ï

	lgain = audio_rsqrt_norm(t);
 
	t = SSC_VSHR32(Er, ((kr-7)<<1)-10);

	rgain = audio_rsqrt_norm(t);

	if (kl < 7)
		kl = 7;
	if (kr < 7)
		kr = 7;

	for (j=0;j<N;j++)
	{
		short r, l;
		/* Apply mid scaling (side is already scaled) */
		l = SSC_MULT16x16_Q15(mid, X[j]);
		r = Y[j];
		X[j] = SSC_toSHORT(SSC_PSHR32(SSC_MULT16x16(lgain, SSC_SUB16(l,r)), kl+1));
		Y[j] = SSC_toSHORT(SSC_PSHR32(SSC_MULT16x16(rgain, SSC_ADD16(l,r)), kr+1));
	}
}

static short ordery_table[] = {
       1,  0,
       3,  0,  2,  1,
       7,  0,  4,  3,  6,  1,  5,  2,
      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
};

static void inverse_interleave_hada(short *X, int N0, int stride, int hadamard)
{
   int i,j;
////////////////////  1583 ~ 1728 //////////////////////////
//   short *tmp = &(shared_local[1583]);
   short *tmp = &(SSC_restricted_local[1583]);
//////////////////////////////////////////////////////////////
   int N;

#ifdef  HW_OPTI5    
   short *X_1 = X;
//   int *X_2 = X;
   short *tmp_1 = tmp;
#endif
#if 0
   N = N0*stride;
#else
   N = SSC_MULT16x16(N0, stride);
#endif
   audio_assert(stride>0);
   if (hadamard)
   {
#ifndef HW_OPTI5    
	  const short *ordery = ordery_table+stride-2;
      for (i=0;i<stride;i++)
      {
         for (j=0;j<N0;j++)
            tmp[ordery[i]*N0+j] = X[j*stride+i];
      }
#else
      short *ordery = ordery_table+stride-2;
      for(i=0;i<stride;i++)  
      {
    	  X_1 = X+i;
    	  tmp_1 = tmp+(*(ordery+i)*N0);

    	  j=N0;
    	  do
    	  {
    		  *tmp_1++ = *X_1;
    		  X_1=X_1+stride;
    	  }while(--j);   	  
      }    
#endif
   } else {
	   
#ifndef HW_OPTI5	   
	   for (i=0;i<stride;i++)
		   for (j=0;j<N0;j++)
			   tmp[i*N0+j] = X[j*stride+i];
#else
      for (i=0;i<stride;i++)
      {
    	  X_1 = X+i;
  
    	  j=N0;
    	  do
    	  {
    		  *tmp_1++ = *X_1;
        	  X_1 = X_1+stride;
    	  }while(--j);
      }
#endif
   }

   for(j=N;j--;)	   
   {
	   *X++=*tmp++;
   }
}


static void forward_interleave_hada(short *X, int N0, int stride, int hadamard)
{
   int i,j;

 //  short *tmp = (short *)shared_lo2;
////////////////////// 1583~1728 /////////////////////////////////
 //  short *tmp = &(shared_local[1583]);
   short *tmp = &(SSC_restricted_local[1583]);
/////////////////////////////////////////////////////////////
   int N;

#ifdef HW_OPTI5   
   short *X_1 = X;
   short *tmp_1 = tmp;
   short *tmp_2 = tmp;
#endif

#if 0
   N = N0*stride;
#else
   N = SSC_MULT16x16(N0, stride);
#endif

   if (hadamard)
   {
#ifndef HW_OPTI5  
	  const int *ordery = ordery_table+stride-2;
      for (i=0;i<stride;i++)
         for (j=0;j<N0;j++)
            tmp[j*stride+i] = X[ordery[i]*N0+j];
#else
      short *ordery = ordery_table+stride-2;
      
  //    i=stride;
  //    do
      for (i=stride;i--;)
      {
    	  tmp_1 = tmp_2++;
    	  X_1 = X+((*ordery++)*N0);
    	    	  
    	  j=N0;
    	  do
    	  {
    	 	*tmp_1 = *X_1++;
    	    tmp_1 = tmp_1+stride;
    	  }while(--j);

    	  tmp_1++; 	  
      }
 //     while(--i);
#endif
   } else {
#ifndef HW_OPTI5 	   
      for (i=0;i<stride;i++)
         for (j=0;j<N0;j++)
            tmp[j*stride+i] = X[i*N0+j];
#else

      for (i=stride;i--;)
      {
    	  tmp_1 = tmp_2++;
    	  
    	  j=N0;
    	  do
    	  {
     		  *tmp_1 = *X_1++;
        	  tmp_1 = tmp_1+stride;
    	  }while(--j);
    	  	  
      }
  //    while(--i);
#endif
   }
   
   
#ifndef HW_OPTI5   
   for (j=0;j<N;j++)
      X[j] = tmp[j];
#else

   j=N;
   do
   {
	   *X++=*tmp++;
   }
   while(--j);
   
#endif

}

void forward_haar(short *X, int N0, int stride)
{
   int i, j;
   
#ifdef HW_OPTI5
   short *X_1;
   short *X_2;
#endif
   
   N0 >>= 1;
   for (i=0;i<stride;i++)
   {
#ifdef HW_OPTI5	  
	  X_1 = (X+i);
	  X_2 = X+(stride+i);	   
  //    for (j=0;j<N0;j++)
      j = N0;	
      do
#else
      for (j=N0;j--;)  	  
#endif
      {
         short tmp1, tmp2;
#ifndef HW_OPTI5	
		 tmp1 = SSC_MULT16x16_Q15(23170, X[stride*(j<<1)+i]);
		 tmp2 = SSC_MULT16x16_Q15(23170, X[stride*((j<<1)+1)+i]);
		 
		 X[stride*2*j+i] = tmp1 + tmp2;
         X[stride*(2*j+1)+i] = tmp1 - tmp2;
#else
		 tmp1 = SSC_MULT16x16_Q15(23170, *X_1);
		 tmp2 = SSC_MULT16x16_Q15(23170, *X_2);
		 *X_1 = tmp1 + tmp2;
		 *X_2 = tmp1 - tmp2;
		 
		 X_1 = X_1+(stride<<1);
		 X_2 = X_2+(stride<<1);
#endif
      }
#ifdef HW_OPTI5
      while(--j);
#endif
   }
}

static int compute_qn(int N, int b, int offset, int pulse_cap)
{
   static const short exp2_table8[8] =
      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
   int qn, qb;
//#if 0
#ifdef VC_PROJ
   int N2 = 2*N-1;
#else
   int N2 = AE_MULA16S_scalar(-1, 2, N);
#endif

#if 0
   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2);
#else
   qb = IMIN(b-pulse_cap-(4<<BITRES), (b+SSC_MULT16x16(N2, offset))/N2);
#endif
   qb = IMIN(8<<BITRES, qb);

   if (qb<4) 
   {
      qn = 1;
   } else {
      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));      
      qn = (qn+1)>>1<<1;
   }
//   audio_assert(qn <= 256);
   return qn;
}

struct band_ctx {

   const AUDIOMode *m;
   int i;
   int intensity;
   int spread;
   int tf_change;
   ec_ctx *ec;
   int remaining_bits;

   unsigned int seed;
};



static void compute_theta_mono(struct band_ctx *ctx, short *sctx,
	short *X, short *Y, int N, int *b, int B, int B0,
	int LM,int *fill)
{
   int qn;
   int itheta=0;
   int delta;
   int imid, iside;
   int qalloc;
   int pulse_cap;
   int offset;
   int tell;
   
#ifndef HW_OPTI5   
   int inv=0;
#endif

   const AUDIOMode *m;
   int i;
//   int intensity;
   ec_ctx *ec;

   m = ctx->m;
   i = ctx->i;
 //  intensity = ctx->intensity;
   ec = ctx->ec;

   /* Decide on the resolution to give to the split parameter theta */
   pulse_cap = m->logN[i]+(LM<<3);
   offset = (pulse_cap>>1) - QTHETA_OFFSET;

   qn = compute_qn(N, *b, offset, pulse_cap);

   tell = ec_tell_frac(ec);
   if (qn!=1)
   {
      /* Entropy coding of the angle. We use a uniform pdf for the
         time split, a step for stereo, and a triangular one for the rest. */
      if (B0>1) {
         /* Uniform pdf */
            itheta = rc_decode_uint(ec, qn+1);
      } else {
         int fs=1, ft;
#if 0
         ft = ((qn>>1)+1)*((qn>>1)+1);
#else
         ft = SSC_MULT16x16((qn>>1)+1, (qn>>1)+1);
#endif
		 {
            /* Triangular pdf */
            int fl=0;
            int fm;
            fm = rc_dec(ec, ft);
#if 0
            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
#else
            if (fm < SSC_MULT16x16(qn>>1, (qn>>1) + 1)>>1)
#endif
            {
               itheta = (isqrt32(8*(unsigned int)fm + 1) - 1)>>1;
               fs = itheta + 1;
               fl = itheta*(itheta + 1)>>1;
            }
            else
            {
               itheta = (2*(qn + 1) - isqrt32(8*(unsigned int)(ft - fm - 1) + 1))>>1;
               fs = qn + 1 - itheta;

#if 0
               fl = ft - ( (qn + 1 - itheta) * (qn + 2 - itheta) >> 1 );
#else
               fl = ft - (SSC_MULT16x16((qn + 1 - itheta), (qn + 2 - itheta)) >> 1 );
#endif
            }

            rc_decode_upd(ec, fl, fl+fs, ft);
         }
      }
      itheta = (int)itheta*16384/qn;
      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
               Let's do that at higher complexity */
   } 

   qalloc = ec_tell_frac(ec) - tell;
   *b -= qalloc;

   if (itheta == 0)
   {
      imid = 32767;
      iside = 0;
      *fill &= (1<<B)-1;
      delta = -16384;
   } else if (itheta == 16384)
   {
      imid = 0;
      iside = 32767;
      *fill &= ((1<<B)-1)<<B;
      delta = 16384;
   } else {

      imid = bitexact_cos((short)itheta);
      iside = bitexact_cos((short)(16384-itheta));

      /* This is the mid vs side allocation that minimizes squared error
         in that band. */
      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
	  // #define FRAC_MUL16(a,b) ((16384+((int)(short)(a)*(short)(b)))>>15)
   }
   sctx[0] = imid;
   sctx[1] = iside;
   sctx[2] = delta;
   sctx[3] = itheta;
   sctx[4] = qalloc;
}

static void compute_theta_stereo(struct band_ctx *ctx, short *sctx,
	short *X, short *Y, int N, int *b, int B, int *fill)
{
   int qn;
   int itheta=0;
   int delta;
   int imid, iside;
   int qalloc;
   int pulse_cap;
   int offset;
   int tell;
   int inv=0;
  
   const AUDIOMode *m;
   int i;
   int intensity;
   ec_ctx *ec;

   m = ctx->m;
   i = ctx->i;
   intensity = ctx->intensity;
   ec = ctx->ec;

   pulse_cap = m->logN[i]+24;
   offset = (pulse_cap>>1) - QTHETA_OFFSET;

   qn = compute_qn(N, *b, offset, pulse_cap);
   
   if (i>=intensity)
      qn = 1;


   tell = ec_tell_frac(ec);
   if (qn!=1)
   {

      {
         int p0 = 3;
         int x = itheta;
         int x0 = qn/2;
         int ft = p0*(x0+1) + x0;
         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
		 {
            int fs;
            fs=rc_dec(ec,ft);

            if (fs < SSC_MULT16x16(x0+1, p0))

               x=fs/p0;
            else
               x = x0 + 1 + (fs - (x0+1) * p0);
            rc_decode_upd(ec, x<=x0?p0*x:(x-1-x0)+(x0+1)*p0, x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0, ft);
            itheta = x;
         }
      } 

      itheta = (int)itheta*16384/qn;


   } else {

      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
      {
            inv = rc_decode_logp_bit(ec, 2);
      } else
         inv = 0;
      itheta = 0;
   }
   qalloc = ec_tell_frac(ec) - tell;
   *b -= qalloc;

   if (itheta == 0)
   {
      imid = 32767;
      iside = 0;
      *fill &= (1<<B)-1;
      delta = -16384;
   } else if (itheta == 16384)
   {
      imid = 0;
      iside = 32767;
      *fill &= ((1<<B)-1)<<B;
      delta = 16384;
   } else {
      imid = bitexact_cos((short)itheta);
      iside = bitexact_cos((short)(16384-itheta));

      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
   }
   sctx[0] = inv;
   sctx[1] = imid;
   sctx[2] = iside;
   sctx[3] = delta;
   sctx[4] = itheta;
   sctx[5] = qalloc;
}

static const short cache_index50[110] = {
	-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,0,0,41,41,82,123,123,159,159,
	-1,-1,-1,-1,-1,-1,-1,-1,0,0,0,0,82,82,82,123,123,181,207,207,224,224,
	0,0,0,0,0,0,0,0,82,82,82,82,181,181,181,207,207,236,249,249,259,259,
	82,82,82,82,82,82,82,82,181,181,181,181,236,236,236,249,249,267,275,275,282,282,
	181,181,181,181,181,181,181,181,236,236,236,236,267,267,267,275,275,288,294,294,300,300,
};

static const unsigned char xche_bits[305] = 
{
	/*0*/		40,15,23,28,31,34,36,38,39,41,42,43,44,45,46,47,47,49,50,51,52,53,54,55,55,57,58,59,60,61,62,63,63,65,66,67,68,69,70,71,71,
	/*41*/		40,20,33,41,48,53,57,61,64,66,69,71,73,75,76,78,80,82,85,87,89,91,92,94,96,98,101,103,105,107,108,110,112,114,117,119,121,123,124,126,128,
	/*82*/		40,23,39,51,60,67,73,79,83,87,91,94,97,100,102,105,107,111,115,118,121,124,126,129,131,135,139,142,145,148,150,153,155,159,163,166,169,172,174,177,179,
	/*123*/		35,28,49,65,78,89,99,107,114,120,126,132,136,141,145,149,153,159,165,171,176,180,185,189,192,199,205,211,216,220,225,229,232,239,245,251,
	/*159*/		21,33,58,79,97,112,125,137,148,157,166,174,182,189,195,201,207,217,227,235,243,251,
	/*181*/		25,31,55,75,91,105,117,128,138,146,154,161,168,174,180,185,190,200,208,215,222,229,235,240,245,255,
	/*207*/		16,36,65,89,110,128,144,159,173,185,196,207,217,226,234,242,250,
	/*224*/		11,41,74,103,128,151,172,191,209,225,241,255,
	/*236*/		12,39,71,99,123,144,164,182,198,214,228,241,253,
	/*249*/		9,44,81,113,142,168,192,214,235,255,
	/*259*/		7,49,90,127,160,191,220,247,
	/*267*/		7,47,87,123,155,184,212,237,
	/*275*/		6,52,97,137,174,208,240,
	/*282*/		5,57,106,151,192,231,
	/*288*/		5,55,103,147,187,224,
	/*294*/		5,60,113,161,206,248,
	/*300*/		4,65,122,175,224,
};

static unsigned divide_quant(struct band_ctx *ssc_matrix, short *X,
	int t_N, int ww, int B, short *lowband,
	int M_extend,
	short gain, int full_fill)
{
   const unsigned char *xche;
   int rp;
   int curr_bits;

   int B_zero=B;
   short mid=0, side=0;
   unsigned central_mode=0;
   short *Y=NULL;
  
   int srd;
   ec_ctx *coding;
   int _sh; 
   int i = ssc_matrix->i;

   _sh = (M_extend+1)*22+i;

   /*
   if(frame_cnt == 9 && ssc_matrix->i==21)
   {
	   printf("before \n");
   }
   */
   srd = ssc_matrix->spread;
   coding = ssc_matrix->ec;


   xche = xche_bits+(*(cache_index50+_sh));


   if (t_N>2 && M_extend != -1 && ww > *(xche+*xche)+12 )
   {

	   short short_ctx[5];
	   short *next_lowband2=NULL;
	   int balancing;
      int middle_bits, side_bits, de;
      int simple_itheta;
      int alloc;
      
	  
      t_N >>= 1;
      Y = X+t_N;
      M_extend -= 1;
      if (B==1)
         full_fill = (full_fill&1)|(full_fill<<1);
      B = (B+1)>>1;

	  compute_theta_mono(ssc_matrix, short_ctx, X, Y, t_N, &ww, B, B_zero,M_extend, &full_fill);


      de = short_ctx[2];
      simple_itheta = short_ctx[3];
      alloc = short_ctx[4];

      mid = short_ctx[0];
      side = short_ctx[1];

      if (B_zero>1 && (simple_itheta&16383))
      {
         if (simple_itheta > 8192)
            de -= de>>(4-M_extend);
         else     
            de = IMIN(0, de + (t_N<<BITRES>>(5-M_extend)));
      }
      middle_bits = IMAX(0, IMIN(ww, ((ww-de)>>1)));
      side_bits = ww-middle_bits;
      ssc_matrix->remaining_bits -= alloc;

      if (lowband)
         next_lowband2 = lowband+t_N; 

      balancing = ssc_matrix->remaining_bits;


	   if (middle_bits < side_bits)
	   {
		   central_mode = divide_quant(ssc_matrix, Y, t_N, side_bits, B,
			   next_lowband2, M_extend,
			   SSC_MULT16x16_P15(gain,side), full_fill>>B)<<(B_zero>>1);
		   balancing = side_bits - (balancing-ssc_matrix->remaining_bits);
		   if (balancing > 24 && simple_itheta!=16384)
			   middle_bits += balancing - 24;

		   central_mode |= divide_quant(ssc_matrix, X, t_N, middle_bits, B,
			   lowband, M_extend,
			   SSC_MULT16x16_P15(gain,mid), full_fill);
	   }
	   else
	   {
		   central_mode = divide_quant(ssc_matrix, X, t_N, middle_bits, B,
			   lowband, M_extend,
			   SSC_MULT16x16_P15(gain,mid), full_fill);

		   balancing = middle_bits - (balancing-ssc_matrix->remaining_bits);
		   if (balancing > 24 && simple_itheta!=0)
			   side_bits += balancing - 24;
		   central_mode |= divide_quant(ssc_matrix, Y, t_N, side_bits, B,
			   next_lowband2, M_extend,
			   SSC_MULT16x16_P15(gain,side), full_fill>>B)<<(B_zero>>1);
	   }


   } else {

	   rp = pulse_coding(ww, xche);
	   curr_bits = (rp == 0) ? 0 : xche[rp]+1;
	 
      ssc_matrix->remaining_bits -= curr_bits;


      while (rp > 0 && ssc_matrix->remaining_bits < 0)
      {
         ssc_matrix->remaining_bits += curr_bits;
         rp--;
		 curr_bits = (rp == 0) ? 0 : xche[rp]+1;
         ssc_matrix->remaining_bits -= curr_bits;
      }

      if (rp!=0)
      {
         central_mode = alg_unquant(X, t_N, num_pulses(rp), srd, B, coding, gain);
      } else {

			int j;   
            unsigned central_mask;
   
            central_mask = (unsigned)(1<<B)-1;
            full_fill &= central_mask;
            if (!full_fill)
            {
			   j = 0;
			   while(j<t_N)
			   {
				   X[j] = 0;
				   j++;
			   }


            } else {


               if (lowband == NULL)
               {
                  for (j=0;j<t_N;j++)
                  {
                     ssc_matrix->seed = audio_lcg_rand(ssc_matrix->seed);
#ifndef HW_FOR_BOXSLIM
                     X[j] = (short)((int)ssc_matrix->seed>>20);
#else
					 X[j] = (short)((int)ssc_matrix->seed>>16);
#endif
                  }

                  central_mode = central_mask;
               } else {

#ifdef HW_FOR_BOXSLIM
				  ssc_matrix->seed = audio_lcg_rand(ssc_matrix->seed);
#endif
/*
				  if(frame_cnt == 9 && ssc_matrix->i==21)
				  {
					   int tmp_k;

					   printf("ssc_matrix->seed = %d\n",ssc_matrix->seed);

					   for(tmp_k = 0; tmp_k<144; tmp_k++)
					   {
						   printf("X[%d] = %d\n",tmp_k,X[tmp_k]);
					   }

					   for(tmp_k = 0; tmp_k<144; tmp_k++)
					   {
						   printf("lowband[%d] = %d\n",tmp_k,lowband[tmp_k]);
					   }

				  }
*/

                  for (j=0;j<t_N;j++)
                  {
#ifndef HW_FOR_BOXSLIM
					 short tmp;
                     ssc_matrix->seed = audio_lcg_rand(ssc_matrix->seed);

					 tmp = ((ssc_matrix->seed)&0x8000)>>12;
					 tmp = tmp-4;
#else
					 short tmp = ((t_N-j)&1)<<3;    //  (t_N-j) = r10
					 tmp = tmp-4;
#endif
					 X[j] = lowband[j]+tmp;
                  }
                  central_mode = full_fill;
               }

/*
			   if(frame_cnt == 9 && ssc_matrix->i==21)
			   {
				   int tmp_k;
				   printf("count = %d, i=%d\n",frame_cnt,ssc_matrix->i);
				   printf("gain=%d\n",gain);
				   printf("before renormalise_vector \n");
				   for(tmp_k = 0; tmp_k<144; tmp_k++)
				   {
					   printf("X[%d] = %d\n",tmp_k,X[tmp_k]);
				   }


			   }
*/

               renormalise_vector(X, t_N, gain);
            }
 
      }
   }
   return central_mode;
}


static unsigned subband_quantization(struct band_ctx *ssc_matrix, short *X,
	int N, int b, int B, short *smallnum_band, short *lowband_out,
	short gain, short *smallnumband_scratch, int full_fill)
{
   int N0=N;
   int ori_N_B=N;
   int zero_N_B;
   int second_div=0;
   int merge_again=0;
   int zero_B=B;
   int longBlocks;
   unsigned central_mode=0;
//   int i;
   static const unsigned char interleaving_tbl[16]={
	   0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3};

   static const unsigned char inverse_interleaving_tbl[16]={
	   0,3,12,15,
	   48,51,60,63,
	   192,195,204,207,
	   240,243,252,255
	   };


   int k;
   
   int time_freq_ch;
   time_freq_ch = ssc_matrix->tf_change;

   longBlocks = zero_B==1;

   ori_N_B /= B;

   if (time_freq_ch>0)
      merge_again = time_freq_ch;




   if (smallnumband_scratch && smallnum_band && (merge_again || (time_freq_ch<0) || zero_B>1))
   {
      short tx = 0;

      while (tx<N)
	  {
         smallnumband_scratch[tx] = smallnum_band[tx];
		 tx++;
	  }
      smallnum_band = smallnumband_scratch;
   }



   if (smallnum_band)
   {
	   k = 0;
	   while (k<merge_again)
	   {
		   forward_haar(smallnum_band, N>>k, 1<<k);
		   full_fill = interleaving_tbl[full_fill&(15)]|interleaving_tbl[full_fill>>4]<<2; 
		   k++;
	   }
	   
   }
   else
   {
	   k = 0;
	   while (k<merge_again)
	   {
		   full_fill = interleaving_tbl[full_fill&0xF]|interleaving_tbl[full_fill>>4]<<2; 
		   k++;
	   }
	   
   }

   
   B>>=merge_again;
   ori_N_B<<=merge_again;

   

   if (smallnum_band)
   {
	   while ((ori_N_B&1) == 0 && time_freq_ch<0)
	   {
	      forward_haar(smallnum_band, ori_N_B, B);
	      full_fill |= full_fill<<B;
	      B <<= 1;
	      ori_N_B >>= 1;
	      second_div++;
	      time_freq_ch++;
	   }
   }
   else
   {
	   while ((ori_N_B&1) == 0 && time_freq_ch<0)
	   {
	      full_fill |= full_fill<<B;
	      B <<= 1;
	      ori_N_B >>= 1;
	      second_div++;
	      time_freq_ch++;
	   }
   }


   zero_B=B;
   zero_N_B = ori_N_B;

   if (zero_B>1)
   {
      if (smallnum_band)
         inverse_interleave_hada(smallnum_band, ori_N_B>>merge_again, zero_B<<merge_again, longBlocks);
   }
/*
   if(frame_cnt == 9 && ssc_matrix->i==21)
   {
	   printf("before divide_quant\n");
	   for(i=0; i<1728; i++)
	   {

	//	   printf("X[%d] = %d\n",i,X[i]);
		   printf("X[%d] = %d\n",i,smallnum_band[i]);
	   }
   }
*/

   central_mode = divide_quant(ssc_matrix, X, N, b, B, smallnum_band, 3, gain, full_fill);

/*
   if(frame_cnt == 9 && ssc_matrix->i==21)
   {
	   printf("after divide_quant");
	   for(i=0; i<144; i++)
	   {

		   printf("X[%d] = %d\n",i,X[i]);
	   }
   }
*/


      if (zero_B>1)
         forward_interleave_hada(X, ori_N_B>>merge_again, zero_B<<merge_again, longBlocks);


      ori_N_B = zero_N_B;
      B = zero_B;

	  k = 0;
      while (k<second_div)
      {
         B >>= 1;
         ori_N_B <<= 1;
         central_mode |= central_mode>>B;
         forward_haar(X, ori_N_B, B);
		 k++;
      }

	  k = 0;
      while (k<merge_again)
      {
         central_mode = inverse_interleaving_tbl[central_mode];
         forward_haar(X, N0>>k, 1<<k);
		 k++;
      }
      B<<=merge_again;
     

      if (lowband_out)
      {
         int j;
         short n;

	     
		 switch(N0)
		 {
		 case 8:
			 n = 5793;
			 break;
		 case 16:
			 n = 8194;
			 break;
		 case 32:
			 n = 11587;
			 break;
		 case 48:
			 n = 14186;
			 break;
		 case 64:
			 n = 16388;
			 break;
		 case 96:
			 n = 20067;
			 break;
		 case 144:
			 n = 24576;
			 break;

		 };

         for (j=0;j<N0;j++)
            lowband_out[j] = SSC_MULT16x16_Q15(n,X[j]);
      }
      central_mode &= (1<<B)-1;
   
   return central_mode;
}


static unsigned two_channel_quant_band(struct band_ctx *ssc_matrix, short *X, short *Y,
	int N, int orig_b, int B, short *smallnum_band,
	short *smallnum_band_out,
	short *lowband_scratch, int fill)
{

	int middle_bit, side_bit, del;
	int fixed_theta;
	int alloc;

   int imid=0, iside=0;
   int reverse = 0;
   short middle=0, side=0;
   unsigned central_mode=0;


//////////////////  1434 ~ 1439  //////////////////////
//   short *sctx = &shared_local[1434];
     short *sctx = &SSC_restricted_local[1434];
//////////////////////////////////////////////////////
   compute_theta_stereo(ssc_matrix, sctx, X, Y, N, &orig_b, B, &fill);
   reverse = (int)sctx[0];
   imid = (int)sctx[1];
   iside = (int)sctx[2];
   del = (int)sctx[3];
   fixed_theta = (int)sctx[4];
   alloc = (int)sctx[5];

   middle = imid;
   side = iside;



   {
      int merge_again;

      middle_bit = IMAX(0, IMIN(orig_b, (orig_b-del)>>1));
      side_bit = orig_b-middle_bit;
      ssc_matrix->remaining_bits -= alloc;

      merge_again = ssc_matrix->remaining_bits;





	  if (middle_bit < side_bit)
	  {
		  central_mode = subband_quantization(ssc_matrix, Y, N, side_bit, B,
			  NULL, NULL,
			  side, NULL, fill>>B);

		  merge_again = side_bit - (merge_again-ssc_matrix->remaining_bits);
		  if (merge_again > 24 && fixed_theta!=16384)
			  middle_bit += merge_again - (24);


		  central_mode |= subband_quantization(ssc_matrix, X, N, middle_bit, B,
			  smallnum_band, smallnum_band_out,
			  32767, lowband_scratch, fill);
	  }
	  else
	  {
///////////////////////////////////////////////////////////////////////////
		  central_mode = subband_quantization(ssc_matrix, X, N, middle_bit, B,
			  smallnum_band, smallnum_band_out,
			  32767, lowband_scratch, fill);
///////////////////////////////////////////////////////////////////////////
		  merge_again = middle_bit - (merge_again-ssc_matrix->remaining_bits);
		  if (merge_again > 24 && fixed_theta!=0)
			  side_bit += merge_again - (24);


		  central_mode |= subband_quantization(ssc_matrix, Y, N, side_bit, B,
			  NULL, NULL,
			  side, NULL, fill>>B);
	  }




   }


      if (N!=2)
         combine_two_ch(X, Y, middle, N);

      if (reverse)
      {
         short z = 0;
		 while(z<N)
		 {
			 Y[z] = -Y[z];
			 z++;
		 }
      }
   
   return central_mode;
}


//short test1[1728];

void all_subband_quantization(const AUDIOMode *mode, short end,
	short *X_, short *Y_, unsigned char *collapse_masks, short *bit_pulse,
	int shortBlocks, int spread, int twoch_dual, int intensity, short *tf_res,
	int total_bits, int x_bal, ec_ctx *ec, int band_cut, unsigned int *matrix_seed	//, short *base_norm
	)
{

   int B;
   int M;
   int smallnumband_offset;
   short update_smallnumband = 1;
   struct band_ctx ssc_matrix;
   int i;
   int good_bits;
   const short * eBands = mode->eBands;
   short * first_norm, * second_norm;
   short *smallnumband_scratch;

   M = 8;
   B = shortBlocks ? M : 1;
  ////////////////////////////////     //////////////////////////////////////////////// 

	 first_norm = SSC_restricted_local;

//   	 first_norm = test_restricted_local;
     second_norm = first_norm + M*eBands[mode->nbEBands-1];



////////////////////////////////////////////////////////////////////////////


//   smallnumband_scratch = Y_+M*eBands[mode->nbEBands];
   smallnumband_scratch = &(SSC_restricted_local[1728]);
/////////////////////////////////////////////////////////////////////////////
 
   ssc_matrix.m = mode;
   ssc_matrix.seed = *matrix_seed;
   ssc_matrix.spread = spread;
   smallnumband_offset = 0;
   ssc_matrix.ec = ec;
   ssc_matrix.intensity = intensity;

   for (i=0;i<end;i++)
   {
      int tell;
      int b;
      int N;

      int balance_current;
   //   int effective_smallnumband=-1;
	  short effective_smallnumband=-1;
//	  int * SSC_RESTRICT X, * SSC_RESTRICT Y;
	  short *X, *Y;

      int tf_change=0;
      unsigned x_central_mode;
      unsigned y_central_mode;
      int last;

      int step = eBands[i]<<3;
    
      ssc_matrix.i = i;

      last = (i==end-1);
      

      X = X_+step;
      Y = Y_+step;
      N = (eBands[i+1]<<3)-step;      

      tell = ec_tell_frac(ec);



	  if (i != 0)
	  {
         x_bal -= tell;
	  }
      good_bits = total_bits-tell-1;
      ssc_matrix.remaining_bits = good_bits;

	  b = 0;
	  if (i < band_cut)
      {
         balance_current = x_bal / IMIN(3, band_cut-i);
         b = IMAX(0, IMIN(16383, IMIN(good_bits+1,bit_pulse[i]+balance_current)));
      } 

	  if (step-N >= 0 && (update_smallnumband || smallnumband_offset==0))
            smallnumband_offset = i;  

      tf_change = tf_res[i];
      ssc_matrix.tf_change = tf_change;

      if (i==end-1)
         smallnumband_scratch = NULL;



      if (smallnumband_offset != 0 && (spread!=THREE_SPR || B>1 || tf_change<0))
	  {
         short special_start;
         short special_end;
         short special_i;

		 effective_smallnumband = IMAX(0, M*eBands[smallnumband_offset]-N);
		 special_start = smallnumband_offset;
		 while(M*eBands[--special_start] > effective_smallnumband);
		 special_end = smallnumband_offset-1;
		 while(M*eBands[++special_end] < effective_smallnumband+N);

         x_central_mode = y_central_mode = 0;


         special_i = special_start; 
		 do {

		   x_central_mode |= collapse_masks[(special_i<<1)];
		   y_central_mode |= collapse_masks[(special_i<<1)+1];

         } while (++special_i<special_end);
      }
      else
         x_central_mode = y_central_mode = (1<<B)-1;


      if (twoch_dual && i==intensity)
      {
         short j = 0;

         twoch_dual = 0;

		 while (j<step)
		 {
		     first_norm[j] = SSC_HLF32(first_norm[j]+second_norm[j]);
			 j++;
		 }
      }

/*
	  if(frame_cnt == 9 && i==21)
	  {
		  printf("sadf");
	  }
*/
	   if (!twoch_dual)
	   {
		   x_central_mode = two_channel_quant_band(&ssc_matrix, X, Y, N, b, B,
			   effective_smallnumband != -1 ? first_norm+effective_smallnumband : NULL,
			   last?NULL:first_norm+step, smallnumband_scratch, x_central_mode|y_central_mode);

		   y_central_mode = x_central_mode;
	   }
	   else
	   {
		   x_central_mode = subband_quantization(&ssc_matrix, X, N, (b>>1), B,
			   effective_smallnumband != -1 ? first_norm+effective_smallnumband : NULL,
			   last?NULL:first_norm+step, 32767, smallnumband_scratch, x_central_mode);
		   y_central_mode = subband_quantization(&ssc_matrix, Y, N, (b>>1), B,
			   effective_smallnumband != -1 ? second_norm+effective_smallnumband : NULL,
			   last?NULL:second_norm+step, 32767, smallnumband_scratch, y_central_mode); 
	   }



	  collapse_masks[(i<<1)] = (unsigned char)x_central_mode;
	  collapse_masks[(i<<1)+1] = (unsigned char)y_central_mode;

      x_bal += bit_pulse[i] + tell;

      update_smallnumband = b>(N<<BITRES);
   }
   *matrix_seed = ssc_matrix.seed;
   

}

