aacdec: Implement LTP support.
Ported from gsoc svn. (cherry picked from commit ead15f1dc196ad164d105e31c8c9025f8a4ee4e7)
This commit is contained in:
		
							parent
							
								
									d3c4829a0d
								
							
						
					
					
						commit
						ece6cca14a
					
				| @ -43,6 +43,7 @@ | ||||
| #define MAX_ELEM_ID 16 | ||||
| 
 | ||||
| #define TNS_MAX_ORDER 20 | ||||
| #define MAX_LTP_LONG_SFB 40 | ||||
| 
 | ||||
| enum RawDataBlockType { | ||||
|     TYPE_SCE, | ||||
| @ -130,6 +131,16 @@ typedef struct { | ||||
| #define SCALE_MAX_DIFF   60    ///< maximum scalefactor difference allowed by standard
 | ||||
| #define SCALE_DIFF_ZERO  60    ///< codebook index corresponding to zero scalefactor indices difference
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Long Term Prediction | ||||
|  */ | ||||
| typedef struct { | ||||
|     int8_t present; | ||||
|     int16_t lag; | ||||
|     float coef; | ||||
|     int8_t used[MAX_LTP_LONG_SFB]; | ||||
| } LongTermPrediction; | ||||
| 
 | ||||
| /**
 | ||||
|  * Individual Channel Stream | ||||
|  */ | ||||
| @ -139,6 +150,7 @@ typedef struct { | ||||
|     uint8_t use_kb_window[2];   ///< If set, use Kaiser-Bessel window, otherwise use a sinus window.
 | ||||
|     int num_window_groups; | ||||
|     uint8_t group_len[8]; | ||||
|     LongTermPrediction ltp; | ||||
|     const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
 | ||||
|     const uint8_t *swb_sizes;   ///< table of scalefactor band sizes for a particular window
 | ||||
|     int num_swb;                ///< number of scalefactor window bands
 | ||||
| @ -214,6 +226,7 @@ typedef struct { | ||||
|     DECLARE_ALIGNED(16, float,   coeffs)[1024];     ///< coefficients for IMDCT
 | ||||
|     DECLARE_ALIGNED(16, float,   saved)[1024];      ///< overlap
 | ||||
|     DECLARE_ALIGNED(16, float,   ret)[2048];        ///< PCM output
 | ||||
|     DECLARE_ALIGNED(16, int16_t, ltp_state)[3072];  ///< time signal for LTP
 | ||||
|     PredictorState predictor_state[MAX_PREDICTORS]; | ||||
| } SingleChannelElement; | ||||
| 
 | ||||
| @ -259,7 +272,7 @@ typedef struct { | ||||
|      * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) | ||||
|      * @{ | ||||
|      */ | ||||
|     DECLARE_ALIGNED(16, float, buf_mdct)[1024]; | ||||
|     DECLARE_ALIGNED(16, float, buf_mdct)[2048]; | ||||
|     /** @} */ | ||||
| 
 | ||||
|     /**
 | ||||
| @ -268,6 +281,7 @@ typedef struct { | ||||
|      */ | ||||
|     FFTContext mdct; | ||||
|     FFTContext mdct_small; | ||||
|     FFTContext mdct_ltp; | ||||
|     DSPContext dsp; | ||||
|     FmtConvertContext fmt_conv; | ||||
|     int random_state; | ||||
|  | ||||
| @ -42,7 +42,7 @@ | ||||
|  * Y                    filterbank - standard | ||||
|  * N (code in SoC repo) filterbank - Scalable Sample Rate | ||||
|  * Y                    Temporal Noise Shaping | ||||
|  * N (code in SoC repo) Long Term Prediction | ||||
|  * Y                    Long Term Prediction | ||||
|  * Y                    intensity stereo | ||||
|  * Y                    channel coupling | ||||
|  * Y                    frequency domain prediction | ||||
| @ -478,6 +478,7 @@ static int decode_audio_specific_config(AACContext *ac, | ||||
|     switch (m4ac->object_type) { | ||||
|     case AOT_AAC_MAIN: | ||||
|     case AOT_AAC_LC: | ||||
|     case AOT_AAC_LTP: | ||||
|         if (decode_ga_specific_config(ac, avctx, &gb, m4ac, m4ac->chan_config)) | ||||
|             return -1; | ||||
|         break; | ||||
| @ -582,6 +583,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) | ||||
| 
 | ||||
|     ff_mdct_init(&ac->mdct,       11, 1, 1.0); | ||||
|     ff_mdct_init(&ac->mdct_small,  8, 1, 1.0); | ||||
|     ff_mdct_init(&ac->mdct_ltp,   11, 0, 1.0); | ||||
|     // window initialization
 | ||||
|     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); | ||||
|     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); | ||||
| @ -630,6 +632,20 @@ static int decode_prediction(AACContext *ac, IndividualChannelStream *ics, | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Decode Long Term Prediction data; reference: table 4.xx. | ||||
|  */ | ||||
| static void decode_ltp(AACContext *ac, LongTermPrediction *ltp, | ||||
|                        GetBitContext *gb, uint8_t max_sfb) | ||||
| { | ||||
|     int sfb; | ||||
| 
 | ||||
|     ltp->lag  = get_bits(gb, 11); | ||||
|     ltp->coef = ltp_coef[get_bits(gb, 3)] * ac->sf_scale; | ||||
|     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++) | ||||
|         ltp->used[sfb] = get_bits1(gb); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Decode Individual Channel Stream info; reference: table 4.6. | ||||
|  * | ||||
| @ -684,9 +700,8 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, | ||||
|                 memset(ics, 0, sizeof(IndividualChannelStream)); | ||||
|                 return -1; | ||||
|             } else { | ||||
|                 av_log_missing_feature(ac->avctx, "Predictor bit set but LTP is", 1); | ||||
|                 memset(ics, 0, sizeof(IndividualChannelStream)); | ||||
|                 return -1; | ||||
|                 if ((ics->ltp.present = get_bits(gb, 1))) | ||||
|                     decode_ltp(ac, &ics->ltp, gb, ics->max_sfb); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| @ -1420,6 +1435,9 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) | ||||
|         i = cpe->ch[1].ics.use_kb_window[0]; | ||||
|         cpe->ch[1].ics = cpe->ch[0].ics; | ||||
|         cpe->ch[1].ics.use_kb_window[1] = i; | ||||
|         if (cpe->ch[1].ics.predictor_present && (ac->m4ac.object_type != AOT_AAC_MAIN)) | ||||
|             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1))) | ||||
|                 decode_ltp(ac, &cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb); | ||||
|         ms_present = get_bits(gb, 2); | ||||
|         if (ms_present == 3) { | ||||
|             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n"); | ||||
| @ -1659,6 +1677,7 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, | ||||
|     int w, filt, m, i; | ||||
|     int bottom, top, order, start, end, size, inc; | ||||
|     float lpc[TNS_MAX_ORDER]; | ||||
|     float tmp[TNS_MAX_ORDER]; | ||||
| 
 | ||||
|     for (w = 0; w < ics->num_windows; w++) { | ||||
|         bottom = ics->num_swb; | ||||
| @ -1684,12 +1703,116 @@ static void apply_tns(float coef[1024], TemporalNoiseShaping *tns, | ||||
|             } | ||||
|             start += w * 128; | ||||
| 
 | ||||
|             if (decode) { | ||||
|                 // ar filter
 | ||||
|                 for (m = 0; m < size; m++, start += inc) | ||||
|                     for (i = 1; i <= FFMIN(m, order); i++) | ||||
|                         coef[start] -= coef[start - i * inc] * lpc[i - 1]; | ||||
|             } else { | ||||
|                 // ma filter
 | ||||
|                 for (m = 0; m < size; m++, start += inc) { | ||||
|                     tmp[0] = coef[start]; | ||||
|                     for (i = 1; i <= FFMIN(m, order); i++) | ||||
|                         coef[start] += tmp[i] * lpc[i - 1]; | ||||
|                     for (i = order; i > 0; i--) | ||||
|                         tmp[i] = tmp[i - 1]; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  *  Apply windowing and MDCT to obtain the spectral | ||||
|  *  coefficient from the predicted sample by LTP. | ||||
|  */ | ||||
| static void windowing_and_mdct_ltp(AACContext *ac, float *out, | ||||
|                                    float *in, IndividualChannelStream *ics) | ||||
| { | ||||
|     const float *lwindow      = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; | ||||
|     const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; | ||||
|     const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; | ||||
|     const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; | ||||
| 
 | ||||
|     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) { | ||||
|         ac->dsp.vector_fmul(in, in, lwindow_prev, 1024); | ||||
|     } else { | ||||
|         memset(in, 0, 448 * sizeof(float)); | ||||
|         ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128); | ||||
|         memcpy(in + 576, in + 576, 448 * sizeof(float)); | ||||
|     } | ||||
|     if (ics->window_sequence[0] != LONG_START_SEQUENCE) { | ||||
|         ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024); | ||||
|     } else { | ||||
|         memcpy(in + 1024, in + 1024, 448 * sizeof(float)); | ||||
|         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128); | ||||
|         memset(in + 1024 + 576, 0, 448 * sizeof(float)); | ||||
|     } | ||||
|     ff_mdct_calc(&ac->mdct_ltp, out, in); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Apply the long term prediction | ||||
|  */ | ||||
| static void apply_ltp(AACContext *ac, SingleChannelElement *sce) | ||||
| { | ||||
|     const LongTermPrediction *ltp = &sce->ics.ltp; | ||||
|     const uint16_t *offsets = sce->ics.swb_offset; | ||||
|     int i, sfb; | ||||
| 
 | ||||
|     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { | ||||
|         float *predTime = ac->buf_mdct; | ||||
|         float *predFreq = sce->ret; | ||||
|         int16_t num_samples = 2048; | ||||
| 
 | ||||
|         if (ltp->lag < 1024) | ||||
|             num_samples = ltp->lag + 1024; | ||||
|         for (i = 0; i < num_samples; i++) | ||||
|             predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef; | ||||
|         memset(&predTime[i], 0, (2048 - i) * sizeof(float)); | ||||
| 
 | ||||
|         windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics); | ||||
| 
 | ||||
|         if (sce->tns.present) | ||||
|             apply_tns(predFreq, &sce->tns, &sce->ics, 0); | ||||
| 
 | ||||
|         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++) | ||||
|             if (ltp->used[sfb]) | ||||
|                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++) | ||||
|                     sce->coeffs[i] += predFreq[i]; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * Update the LTP buffer for next frame | ||||
|  */ | ||||
| static void update_ltp(AACContext *ac, SingleChannelElement *sce) | ||||
| { | ||||
|     IndividualChannelStream *ics = &sce->ics; | ||||
|     float *saved     = sce->saved; | ||||
|     float *saved_ltp = sce->coeffs; | ||||
|     const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; | ||||
|     const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; | ||||
|     int i; | ||||
| 
 | ||||
|     for (i = 0; i < 512; i++) | ||||
|         ac->buf_mdct[1535 - i] = ac->buf_mdct[512 + i]; | ||||
| 
 | ||||
|     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { | ||||
|         memcpy(saved_ltp,       saved, 512 * sizeof(float)); | ||||
|         memset(saved_ltp + 576, 0,     448 * sizeof(float)); | ||||
|         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     swindow,     128); | ||||
|     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { | ||||
|         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(float)); | ||||
|         memset(saved_ltp + 576, 0,                  448 * sizeof(float)); | ||||
|         ac->dsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     swindow,     128); | ||||
|     } else { // LONG_STOP or ONLY_LONG
 | ||||
|         ac->dsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     lwindow,     1024); | ||||
|     } | ||||
| 
 | ||||
|     memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t)); | ||||
|     ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret,  1024); | ||||
|     ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
| @ -1857,6 +1980,14 @@ static void spectral_to_sample(AACContext *ac) | ||||
|             if (che) { | ||||
|                 if (type <= TYPE_CPE) | ||||
|                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, apply_dependent_coupling); | ||||
|                 if (ac->m4ac.object_type == AOT_AAC_LTP) { | ||||
|                     if (che->ch[0].ics.predictor_present) { | ||||
|                         if (che->ch[0].ics.ltp.present) | ||||
|                             apply_ltp(ac, &che->ch[0]); | ||||
|                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE) | ||||
|                             apply_ltp(ac, &che->ch[1]); | ||||
|                     } | ||||
|                 } | ||||
|                 if (che->ch[0].tns.present) | ||||
|                     apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1); | ||||
|                 if (che->ch[1].tns.present) | ||||
| @ -1865,8 +1996,12 @@ static void spectral_to_sample(AACContext *ac) | ||||
|                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, apply_dependent_coupling); | ||||
|                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) { | ||||
|                     imdct_and_windowing(ac, &che->ch[0]); | ||||
|                     if (ac->m4ac.object_type == AOT_AAC_LTP) | ||||
|                         update_ltp(ac, &che->ch[0]); | ||||
|                     if (type == TYPE_CPE) { | ||||
|                         imdct_and_windowing(ac, &che->ch[1]); | ||||
|                         if (ac->m4ac.object_type == AOT_AAC_LTP) | ||||
|                             update_ltp(ac, &che->ch[1]); | ||||
|                     } | ||||
|                     if (ac->m4ac.sbr > 0) { | ||||
|                         ff_sbr_apply(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret); | ||||
| @ -2080,6 +2215,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) | ||||
| 
 | ||||
|     ff_mdct_end(&ac->mdct); | ||||
|     ff_mdct_end(&ac->mdct_small); | ||||
|     ff_mdct_end(&ac->mdct_ltp); | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -35,6 +35,14 @@ | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| /* @name ltp_coef
 | ||||
|  * Table of the LTP coefficient (multiplied by 2) | ||||
|  */ | ||||
| static const float ltp_coef[8] = { | ||||
|      1.141658,    1.393232,    1.626008,    1.822608, | ||||
|      1.969800,    2.135788,    2.2389202,   2.739066, | ||||
| }; | ||||
| 
 | ||||
| /* @name tns_tmp2_map
 | ||||
|  * Tables of the tmp2[] arrays of LPC coefficients used for TNS. | ||||
|  * The suffix _M_N[] indicate the values of coef_compress and coef_res | ||||
|  | ||||
| @ -57,7 +57,7 @@ enum AudioObjectType { | ||||
|     AOT_AAC_MAIN,              ///< Y                       Main
 | ||||
|     AOT_AAC_LC,                ///< Y                       Low Complexity
 | ||||
|     AOT_AAC_SSR,               ///< N (code in SoC repo)    Scalable Sample Rate
 | ||||
|     AOT_AAC_LTP,               ///< N (code in SoC repo)    Long Term Prediction
 | ||||
|     AOT_AAC_LTP,               ///< Y                       Long Term Prediction
 | ||||
|     AOT_SBR,                   ///< Y                       Spectral Band Replication
 | ||||
|     AOT_AAC_SCALABLE,          ///< N                       Scalable
 | ||||
|     AOT_TWINVQ,                ///< N                       Twin Vector Quantizer
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user