avcodec/fft: Add revtab32 for FFTs with more than 65536 samples
x86 optimizations are used only for the cases they support (<=65536 samples) Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
ae76b84221
commit
305344d89e
@ -110,6 +110,7 @@ struct FFTContext {
|
|||||||
void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
|
void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
|
||||||
enum fft_permutation_type fft_permutation;
|
enum fft_permutation_type fft_permutation;
|
||||||
enum mdct_permutation_type mdct_permutation;
|
enum mdct_permutation_type mdct_permutation;
|
||||||
|
uint32_t *revtab32;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if CONFIG_HARDCODED_TABLES
|
#if CONFIG_HARDCODED_TABLES
|
||||||
|
@ -143,14 +143,23 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
|
|||||||
{
|
{
|
||||||
int i, j, n;
|
int i, j, n;
|
||||||
|
|
||||||
|
s->revtab = NULL;
|
||||||
|
s->revtab32 = NULL;
|
||||||
|
|
||||||
if (nbits < 2 || nbits > 17)
|
if (nbits < 2 || nbits > 17)
|
||||||
goto fail;
|
goto fail;
|
||||||
s->nbits = nbits;
|
s->nbits = nbits;
|
||||||
n = 1 << nbits;
|
n = 1 << nbits;
|
||||||
|
|
||||||
s->revtab = av_malloc(n * sizeof(uint16_t));
|
if (nbits <= 16) {
|
||||||
if (!s->revtab)
|
s->revtab = av_malloc(n * sizeof(uint16_t));
|
||||||
goto fail;
|
if (!s->revtab)
|
||||||
|
goto fail;
|
||||||
|
} else {
|
||||||
|
s->revtab32 = av_malloc(n * sizeof(uint32_t));
|
||||||
|
if (!s->revtab32)
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
|
s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
|
||||||
if (!s->tmp_buf)
|
if (!s->tmp_buf)
|
||||||
goto fail;
|
goto fail;
|
||||||
@ -192,16 +201,22 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
|
|||||||
fft_perm_avx(s);
|
fft_perm_avx(s);
|
||||||
} else {
|
} else {
|
||||||
for(i=0; i<n; i++) {
|
for(i=0; i<n; i++) {
|
||||||
|
int k;
|
||||||
j = i;
|
j = i;
|
||||||
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
|
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
|
||||||
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
|
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
|
||||||
s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
|
k = -split_radix_permutation(i, n, s->inverse) & (n-1);
|
||||||
|
if (s->revtab)
|
||||||
|
s->revtab[k] = j;
|
||||||
|
if (s->revtab32)
|
||||||
|
s->revtab32[k] = j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
fail:
|
fail:
|
||||||
av_freep(&s->revtab);
|
av_freep(&s->revtab);
|
||||||
|
av_freep(&s->revtab32);
|
||||||
av_freep(&s->tmp_buf);
|
av_freep(&s->tmp_buf);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -210,15 +225,21 @@ static void fft_permute_c(FFTContext *s, FFTComplex *z)
|
|||||||
{
|
{
|
||||||
int j, np;
|
int j, np;
|
||||||
const uint16_t *revtab = s->revtab;
|
const uint16_t *revtab = s->revtab;
|
||||||
|
const uint32_t *revtab32 = s->revtab32;
|
||||||
np = 1 << s->nbits;
|
np = 1 << s->nbits;
|
||||||
/* TODO: handle split-radix permute in a more optimal way, probably in-place */
|
/* TODO: handle split-radix permute in a more optimal way, probably in-place */
|
||||||
for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
|
if (revtab) {
|
||||||
|
for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
|
||||||
|
} else
|
||||||
|
for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
|
||||||
|
|
||||||
memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
|
memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
|
||||||
}
|
}
|
||||||
|
|
||||||
av_cold void ff_fft_end(FFTContext *s)
|
av_cold void ff_fft_end(FFTContext *s)
|
||||||
{
|
{
|
||||||
av_freep(&s->revtab);
|
av_freep(&s->revtab);
|
||||||
|
av_freep(&s->revtab32);
|
||||||
av_freep(&s->tmp_buf);
|
av_freep(&s->tmp_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,6 +26,9 @@ av_cold void ff_fft_init_x86(FFTContext *s)
|
|||||||
{
|
{
|
||||||
int cpu_flags = av_get_cpu_flags();
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (s->nbits > 16)
|
||||||
|
return;
|
||||||
|
|
||||||
#if ARCH_X86_32
|
#if ARCH_X86_32
|
||||||
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
|
||||||
/* 3DNow! for K6-2/3 */
|
/* 3DNow! for K6-2/3 */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user