/*==================================================================================== EVS Codec 3GPP TS26.443 Jun 30, 2015. Version CR 26.443-0006 ====================================================================================*/ #include #include #include "options.h" #include "cnst.h" #include "prot.h" #include "rom_com.h" /*-----------------------------------------------------------------* * Local functions *-----------------------------------------------------------------*/ static void create_random_vector( float output[], const short length, short seed[] ); static void flip_spectrum( const float input[], float output[], const short length ); static void Hilbert_transform( float tmp_R[], float tmp_I[], float *tmpi_R, float *tmpi_I, const short length, const short HB_stage_id ); /*-------------------------------------------------------------------* * swb_tbe_reset() * * Reset the SWB TBE encoder *-------------------------------------------------------------------*/ void swb_tbe_reset( float mem_csfilt[], float mem_genSHBexc_filt_down_shb[], float state_lpc_syn[], float syn_overlap[], float state_syn_shbexc[], float *tbe_demph, float *tbe_premph, float mem_stp_swb[], float *gain_prec_swb ) { set_f( mem_csfilt, 0, 2); set_f( mem_genSHBexc_filt_down_shb, 0.0f, (2*ALLPASSSECTIONS_STEEP+1) ); set_f( state_lpc_syn, 0.0f, LPC_SHB_ORDER ); set_f( syn_overlap, 0.0f, L_SHB_LAHEAD ); set_f( state_syn_shbexc, 0.0f, L_SHB_LAHEAD ); *tbe_demph = 0.0f; *tbe_premph = 0.0f; set_f(mem_stp_swb, 0, LPC_SHB_ORDER); *gain_prec_swb = 1.0f; return; } /*-------------------------------------------------------------------* * swb_tbe_reset_synth() * * Reset the extra parameters needed for synthesis of the SWB TBE output *-------------------------------------------------------------------*/ void swb_tbe_reset_synth( float genSHBsynth_Hilbert_Mem[], float genSHBsynth_state_lsyn_filt_shb_local[] ) { set_f( genSHBsynth_Hilbert_Mem, 0.0f, HILBERT_MEM_SIZE ); set_f( genSHBsynth_state_lsyn_filt_shb_local, 0.0f, 2 * L_FILT16k ); return; } /*-------------------------------------------------------------------* * tbe_celp_exc_offset() * * Compute tbe bwe celp excitation offset *-------------------------------------------------------------------*/ short tbe_celp_exc_offset( const short T0, /* i : Integer pitch */ const short T0_frac /* i : Fractional part of the pitch */ ) { short offset; offset = T0 * HIBND_ACB_L_FAC + (int) ((float) T0_frac * 0.25f * HIBND_ACB_L_FAC + 2 * HIBND_ACB_L_FAC + 0.5f) - 2 * HIBND_ACB_L_FAC; return offset; } /*-------------------------------------------------------------------* * flip_and_downmix_generic() * * flips the spectrum and downmixes the signals, lpf if needed *-------------------------------------------------------------------*/ void flip_and_downmix_generic( float input[], /* i : input spectrum */ float output[], /* o : output spectrum */ const short length, /* i : length of spectra */ float mem1_ext[], /* i/o: Hilbert filter memory */ float mem2_ext[], /* i/o: memory */ float mem3_ext[], /* i/o: memory */ short *phase_state /* i/o: Phase state in case frequency isn't multiple of 50 Hz */ ) { short i, j; float tmp[L_FRAME32k + HILBERT_ORDER1]; float tmpi_R[L_FRAME32k]; float tmpi_I[L_FRAME32k]; float tmpi2_R[L_FRAME32k + HILBERT_ORDER2]; float tmpi2_I[L_FRAME32k + HILBERT_ORDER2]; float tmp_R[L_FRAME32k + HILBERT_ORDER2]; float tmp_I[L_FRAME32k + HILBERT_ORDER2]; short k, period; float recip_period; float local_negsin_table[L_FRAME16k]; float local_cos_table[L_FRAME16k]; period = 17; /* == (short) (32000.0f / 1850.0f + 0.5f); */ recip_period = 256.0f / (float) period; for( i=0; i= period ) { *phase_state = 0; } for( i=0, j=*phase_state; i < length; ) { for( ; (j < period) && (i < length); j++, i++ ) { output[i] = tmp_R[i + HILBERT_ORDER2] * local_cos_table[j] + tmp_I[i + HILBERT_ORDER2] * local_negsin_table[j]; } if( j >= period ) { j = 0; } } *phase_state = j; return; } /*---------------------------------------------- * Hilbert_transform() * * Hilbert transform *------------------------------------------------*/ static void Hilbert_transform( float tmp_R[], /* i: Real component of HB */ float tmp_I[], /* i: Real component of HB */ float tmpi_R[], /* o: Real component of HB */ float tmpi_I[], /* o: Imag. component of HB */ const short length, /* i: input length */ const short HB_stage_id /* i: HB transform stage */ ) { short i, hb_filter_stage, offset; hb_filter_stage = 2*HB_stage_id; offset = (HB_stage_id == 0) ? 1 : 0; if (HB_stage_id == 0 || HB_stage_id == 2) { for( i=0; i 0.35f) ) { csfilt_num2[0] = 0.2f; csfilt_den2[1] = -0.8f; } else if( igf_flag && (coder_type == UNVOICED || avg_voice_fac < 0.2f) ) { csfilt_num2[0] = 0.01f; csfilt_den2[1] = -0.99f; } set_f( wht_fil_mem, 0, LPC_WHTN_ORDER_WB ); Decimate_allpass_steep( bwe_exc_extended, mem_genSHBexc_filt_down1, L_FRAME32k, excTmp ); flip_spectrum_and_decimby4( excTmp, exc4k, L_FRAME16k, mem_genSHBexc_filt_down2, mem_genSHBexc_filt_down3, 0 ); if( uv_flag ) { /* unvoiced signal */ create_random_vector( exc4kWhtnd, L_FRAME16k/4, bwe_seed ); } else { autocorr(exc4k, R, LPC_WHTN_ORDER_WB+1, L_FRAME16k/4, win_flatten_4k, 0, 1, 1); /* Ensure R[0] isn't zero when entering Levinson Durbin */ R[0] = max(R[0], 1.0e-8f); for( i = 0; i <= LPC_WHTN_ORDER_WB; i++ ) { R[i] = R[i] * wac[i]; } lev_dur( lpc_whtn, R, LPC_WHTN_ORDER_WB, ervec ); fir( exc4k, lpc_whtn, exc4kWhtnd, wht_fil_mem, L_FRAME16k/4, LPC_WHTN_ORDER_WB, 0); /* Ensure pow1 is greater than zero when computing normalization */ for( i=0, pow1=0.00001f; i 1.0f) { formant_fac = 1.0f; } else if (formant_fac < 0.0f) { formant_fac = 0.0f; } formant_fac = 1.0f - 0.5f*formant_fac; return formant_fac; } /*-------------------------------------------------------------------* * GenShapedSHBExcitation() * * Synthesize spectrally shaped highband excitation signal *-------------------------------------------------------------------*/ void GenShapedSHBExcitation( float *excSHB, /* o : synthesized shaped shb excitation */ const float *lpc_shb, /* i : lpc coefficients */ float *White_exc16k_FB, /* o : white excitation for the Fullband extension */ float *mem_csfilt, /* i/o: memory */ float *mem_genSHBexc_filt_down_shb,/* i/o: memory */ float *state_lpc_syn, /* i/o: memory */ const short coder_type, /* i : coding type */ const float *bwe_exc_extended, /* i : bandwidth extended excitation */ short bwe_seed[], /* i/o: random number generator seed */ float voice_factors[], /* i : voicing factor*/ const short extl, /* i : extension layer */ float *tbe_demph, /* i/o: de-emphasis memory */ float *tbe_premph, /* i/o: pre-emphasis memory */ float *lpc_shb_sf, /* i: LP coefficients */ float *shb_ener_sf, float *shb_res_gshape, float *shb_res, short *vf_ind, const float formant_fac, /* i : Formant sharpening factor [0..1] */ float fb_state_lpc_syn[], /* i/o: memory */ float *fb_tbe_demph, /* i/o: fb de-emphasis memory */ const long bitrate, /* i : bitrate */ const short prev_bfi /* i : previous frame was concealed */ ) { short i, j, k; float wht_fil_mem[LPC_WHTN_ORDER]; float lpc_whtn[LPC_WHTN_ORDER + 1]; float R[LPC_WHTN_ORDER + 2]; float exc32k[L_FRAME32k], exc16k[L_FRAME16k]; float pow1, pow2, scale, temp1, temp2; float excTmp2[L_FRAME16k]; short nbSubFr; float excNoisyEnv[ L_FRAME16k]; float csfilt_num2[1] = {0.2f}; float csfilt_den2[2] = {1.0f, -0.8f}; float varEnvShape; float ervec[LPC_WHTN_ORDER+2]; float exc16kWhtnd[L_FRAME16k]; float temp = 0.0f; float *White_exc16k; float voiceFacEst[NB_SUBFR16k]; float syn_shb_ener_sf[4], tempSHB[80]; float zero_mem[LPC_SHB_ORDER]; float vf_tmp; float White_exc16k_FB_temp[L_FRAME16k]; float fb_deemph_fac = 0.48f; set_f( zero_mem, 0, LPC_SHB_ORDER); set_f( wht_fil_mem, 0, LPC_WHTN_ORDER ); for(i = 0; i < L_FRAME32k; i++) { exc32k[i] = ((i%2)==0)?(-bwe_exc_extended[i]):(bwe_exc_extended[i]); } /* Decimate by 2 */ Decimate_allpass_steep( exc32k, mem_genSHBexc_filt_down_shb, 2*L_FRAME16k, exc16k ); autocorr( exc16k, R, LPC_WHTN_ORDER+1, L_FRAME16k, win_flatten, 0, 1, 1 ); /* Ensure R[0] isn't zero when entering Levinson-Durbin */ R[0] = max(R[0], 1.0e-8f); for( i = 0; i <= LPC_WHTN_ORDER; i++ ) { R[i] = R[i] * wac[i]; } /* Ensure R[0] isn't zero when entering Levinson-Durbin */ R[0] += 1.0e-8f; lev_dur( lpc_whtn, R, LPC_WHTN_ORDER, ervec ); fir( exc16k, lpc_whtn, exc16kWhtnd, wht_fil_mem, L_FRAME16k, LPC_WHTN_ORDER, 0 ); if( bitrate >= ACELP_24k40 ) { for(i = 0; i < L_FRAME16k; i++) { exc16kWhtnd[i] *= shb_res_gshape[(short)(i/80)]; } } for( k=0, pow1=0.00001f; k= ACELP_7k20) { varEnvShape = mean(voice_factors, 4); } else { varEnvShape = mean(voice_factors, 5); } if ( extl == FB_TBE) { fb_deemph_fac = max((0.68f - (float)pow(varEnvShape, 3)), 0.48f); } varEnvShape = 1.09875f - 0.49875f * varEnvShape; varEnvShape = min( max(varEnvShape, 0.6f), 0.999f); csfilt_num2[0] = 1.0f - varEnvShape; csfilt_den2[1] = - varEnvShape; if (*mem_csfilt == 0 && ( bitrate == ACELP_9k60 || bitrate == ACELP_16k40 || bitrate == ACELP_24k40 ) ) { /* pre-init smoothing avoid energy drop outs */ float tmp_scale = 0; for (i=0; i= ACELP_24k40 ) { if( *vf_ind == 20 ) /* encoder side */ { Estimate_mix_factors( shb_res, exc16kWhtnd, White_exc16k, pow1, pow2, voiceFacEst, vf_ind ); temp = (voiceFacEst[0] > 0.7f)? 1.0f : 0.8f; } else /* decoder side */ { temp = ((*vf_ind * 0.125f) > 0.7f)? 1.0f : 0.8f; } for(i = 0; i < NB_SUBFR16k; i++) { voice_factors[i] *= temp; } } mvr2r( White_exc16k, White_exc16k_FB, L_FRAME16k ); deemph( White_exc16k, PREEMPH_FAC, L_FRAME16k, tbe_demph ); if( coder_type == UNVOICED ) { scale = sqrt( pow1/pow2 ); if ((pow2)==0) scale = 0; for( k=0; k ths ) { en_abs = 1; } p_out = output + NL_BUFF_OFFSET; /* NL_BUFF_OFFSET = 12 */ /* update buffer memory */ mvr2r( old_bwe_exc_extended, output, NL_BUFF_OFFSET ); for (i=j=0; i max) { max = temp; j = i; } } if (max > 1.0f) { scale = 0.67f / max; } else { scale = 0.67f; } if ( *prev_scale <= 0.0 || *prev_scale > 1024.0f * scale ) { scale_step = 1.0; *prev_scale = scale; } else { scale_step = 1.0f; if(j != 0) { scale_step = (float) exp(1.0f / (float) j * (float) log(scale / *prev_scale)); } } for (i=0; i= 0.0) { *p_out++ = (input[i] * input[i]) **prev_scale; } else { if (en_abs) { *p_out++ = 1.0f * (input[i] * input[i]) **prev_scale; } else { *p_out++ = -1.0f * (input[i] * input[i]) **prev_scale; } } if (i < j) { *prev_scale *= scale_step; } } max = 0.0f; for (i=j=length/2; i max) { max = temp; j = i; } } if (max > 1.0f) { scale = 0.67f / max; } else { scale = 0.67f; } if ( *prev_scale <= 0.0 || *prev_scale > 1024.0f * scale ) { scale_step = 1.0; *prev_scale = scale; } else { scale_step = 1.0f; if(j != length/2) { scale_step = (float) exp(1.0f / (float) (j - length/2) * (float) log(scale / *prev_scale)); } } for (i=length/2; i= 0.0) { *p_out++ = (input[i] * input[i]) **prev_scale; } else { if (en_abs) { *p_out++ = 1.0f * (input[i] * input[i]) **prev_scale; } else { *p_out++ = -1.0f * (input[i] * input[i]) **prev_scale; } } if (i < j) { *prev_scale *= scale_step; } } /* update buffer memory */ mvr2r( output + L_FRAME32k, old_bwe_exc_extended, NL_BUFF_OFFSET ); return; } /*-------------------------------------------------------------------* * create_random_vector() * * creates random number vector * -------------------------------------------------------------------*/ void create_random_vector( float output[], /* o : output random vector */ const short length, /* i : length of random vector */ short seed[] /* i/o: start seed */ ) { short i, j, k; float scale1, scale2; j = (short) (own_random(&seed[0]) * 0.0078f); j = abs(j) & 0xff; k = (short) (own_random(&seed[1]) * 0.0078f); k = abs(k) & 0xff; while( k==j ) { k = (short)(own_random(&seed[1]) * 0.0078f); k = abs(k) & 0xff; } if( own_random(&seed[0]) < 0 ) { scale1 = -563.154f; /* -200.00f * 0.35f/0.1243f; */ } else { scale1 = 563.154f; /* 200.00f * 0.35f/0.1243f; */ } if( own_random(&seed[1]) < 0 ) { scale2 = -225.261f; /* -80.00f * 0.35f/0.1243f; */ } else { scale2 = 225.261f; /* 80.00f * 0.35f/0.1243f; */ } for( i=0; i 115.5f) && core_brate > ACELP_8k00 ) { if(T0 <= 57.75f) { tmp = -0.0126f*T0 + 1.23f; } else if(T0 > 57.75f && T0 < 115.5f) { tmp = min(0.0087f*T0, 1.0f); } else if (T0 >= 115.5f) { tmp = 1.0f; } *voice_factors *= tmp; } *voice_factors = min( max(0.000001f, *voice_factors), 0.999999f); if( L_frame == L_FRAME ) { interp_code_5over2( code, tmp_code, L_SUBFR ); for( i = 0; i < L_SUBFR * HIBND_ACB_L_FAC; i++ ) { bwe_exc[i + i_subfr * HIBND_ACB_L_FAC] = gain_pit * bwe_exc[i + i_subfr * HIBND_ACB_L_FAC] + gain_code * tmp_code[i]; } } else { for( i = 0; i < L_SUBFR; i++ ) { tmp_code_preInt[i] = gain_code * code[i] + 2 * gain_preQ * code_preQ[i]; } interp_code_4over2( tmp_code_preInt, tmp_code, L_SUBFR ); for( i = 0; i < L_SUBFR * 2; i++ ) { bwe_exc[i + i_subfr * 2] = gain_pit * bwe_exc[i + i_subfr * 2] + tmp_code[i]; } } return; } /*-------------------------------------------------------------------* * get_tbe_bits() * * * * Determine TBE bit consumption per frame from bit rate * *-------------------------------------------------------------------*/ short get_tbe_bits( short bitrate, short bandwidth, short rf_mode ) { short i, bits = 0; if( rf_mode ) { /* TBE bits for core, primary frame */ if( bandwidth == WB && bitrate == ACELP_13k20 ) { /* Gain frame: 4, Gain shapes: 0, and LSFs: 2 */ bits = NUM_BITS_SHB_FrameGain_LBR_WB + NUM_BITS_LBR_WB_LSF; } else if( bandwidth == SWB && bitrate == ACELP_13k20 ) { /* Gain frame: 5, Gain shapes: 5, and lowrate LSFs: 8 */ bits = NUM_BITS_SHB_FRAMEGAIN + NUM_BITS_SHB_SUBGAINS + 8; } } else { if( bandwidth == WB && bitrate == ACELP_9k60 ) { bits = NUM_BITS_LBR_WB_LSF + NUM_BITS_SHB_FrameGain_LBR_WB; } else if( bandwidth == SWB || bandwidth == FB ) { if( bitrate == ACELP_9k60 ) { bits = NUM_BITS_SHB_FRAMEGAIN + NUM_BITS_SHB_SUBGAINS + 8; } else if( bitrate >= ACELP_13k20 && bitrate <= ACELP_32k ) { bits = NUM_BITS_SHB_SUBGAINS + NUM_BITS_SHB_FRAMEGAIN + NUM_LSF_GRID_BITS + MIRROR_POINT_BITS; for( i=0; i= ACELP_24k40 ) { bits += NUM_BITS_SHB_ENER_SF + NUM_BITS_SHB_VF + NUM_BITS_SHB_RES_GS*NB_SUBFR16k; } if( bandwidth == SWB && (bitrate == ACELP_16k40 || bitrate == ACELP_24k40) ) { bits += BITS_TEC + BITS_TFA; } if( bandwidth == FB ) { /* fullband slope */ bits += 4; } } } return bits; }