~4x faster than the C version. The shuffles in the 15pt dim1 are seriously expensive. Not happy with it, but I'm contempt. Can be easily converted to pure AVX by removing all vpermpd/vpermps instructions.
		
			
				
	
	
		
			129 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			129 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * This file is part of FFmpeg.
 | 
						|
 *
 | 
						|
 * FFmpeg is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation; either version 2 of the License, or
 | 
						|
 * (at your option) any later version.
 | 
						|
 *
 | 
						|
 * FFmpeg is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License along
 | 
						|
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 | 
						|
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
						|
 */
 | 
						|
 | 
						|
#include "libavutil/mem_internal.h"
 | 
						|
#include "libavutil/tx.h"
 | 
						|
#include "libavutil/error.h"
 | 
						|
 | 
						|
#include "checkasm.h"
 | 
						|
 | 
						|
#include <stdlib.h>
 | 
						|
 | 
						|
#define EPS 0.0005
 | 
						|
 | 
						|
#define SCALE_NOOP(x) (x)
 | 
						|
#define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
 | 
						|
 | 
						|
#define randomize_complex(BUF, LEN, TYPE, SCALE)                \
 | 
						|
    do {                                                        \
 | 
						|
        TYPE *buf = (TYPE *)BUF;                                \
 | 
						|
        for (int i = 0; i < LEN; i++) {                         \
 | 
						|
            double fre = (double)rnd() / UINT_MAX;              \
 | 
						|
            double fim = (double)rnd() / UINT_MAX;              \
 | 
						|
            buf[i] = (TYPE){ SCALE(fre), SCALE(fim) };          \
 | 
						|
        }                                                       \
 | 
						|
    } while (0)
 | 
						|
 | 
						|
static const int check_lens[] = {
 | 
						|
    2, 4, 8, 16, 32, 64, 120, 960, 1024, 1920, 16384,
 | 
						|
};
 | 
						|
 | 
						|
static AVTXContext *tx_refs[AV_TX_NB][2 /* Direction */][FF_ARRAY_ELEMS(check_lens)] = { 0 };
 | 
						|
static int init = 0;
 | 
						|
 | 
						|
static void free_tx_refs(void)
 | 
						|
{
 | 
						|
    for (int i = 0; i < FF_ARRAY_ELEMS(tx_refs); i++)
 | 
						|
        for (int j = 0; j < FF_ARRAY_ELEMS(*tx_refs); j++)
 | 
						|
            for (int k = 0; k < FF_ARRAY_ELEMS(**tx_refs); k++)
 | 
						|
                av_tx_uninit(&tx_refs[i][j][k]);
 | 
						|
}
 | 
						|
 | 
						|
#define CHECK_TEMPLATE(PREFIX, TYPE, DIR, DATA_TYPE, SCALE_TYPE, LENGTHS, CHECK_EXPRESSION) \
 | 
						|
    do {                                                                          \
 | 
						|
        int err;                                                                  \
 | 
						|
        AVTXContext *tx;                                                          \
 | 
						|
        av_tx_fn fn;                                                              \
 | 
						|
        int num_checks = 0;                                                       \
 | 
						|
        int last_check = 0;                                                       \
 | 
						|
                                                                                  \
 | 
						|
        for (int i = 0; i < FF_ARRAY_ELEMS(LENGTHS); i++) {                       \
 | 
						|
            int len = LENGTHS[i];                                                 \
 | 
						|
            const SCALE_TYPE scale = 1.0 / len;                                   \
 | 
						|
                                                                                  \
 | 
						|
            if ((err = av_tx_init(&tx, &fn, TYPE, DIR, len, &scale, 0x0)) < 0) {  \
 | 
						|
                fprintf(stderr, "av_tx: %s\n", av_err2str(err));                  \
 | 
						|
                return;                                                           \
 | 
						|
            }                                                                     \
 | 
						|
                                                                                  \
 | 
						|
            if (check_func(fn, PREFIX "_%i", len)) {                              \
 | 
						|
                AVTXContext *tx_ref = tx_refs[TYPE][DIR][i];                      \
 | 
						|
                if (!tx_ref)                                                      \
 | 
						|
                    tx_ref = tx;                                                  \
 | 
						|
                num_checks++;                                                     \
 | 
						|
                last_check = len;                                                 \
 | 
						|
                call_ref(tx_ref, out_ref, in, sizeof(DATA_TYPE));                 \
 | 
						|
                call_new(tx,     out_new, in, sizeof(DATA_TYPE));                 \
 | 
						|
                if (CHECK_EXPRESSION) {                                           \
 | 
						|
                    fail();                                                       \
 | 
						|
                    av_tx_uninit(&tx);                                            \
 | 
						|
                    break;                                                        \
 | 
						|
                }                                                                 \
 | 
						|
                bench_new(tx, out_new, in, sizeof(DATA_TYPE));                    \
 | 
						|
                av_tx_uninit(&tx_refs[TYPE][DIR][i]);                             \
 | 
						|
                tx_refs[TYPE][DIR][i] = tx;                                       \
 | 
						|
            } else {                                                              \
 | 
						|
                av_tx_uninit(&tx);                                                \
 | 
						|
            }                                                                     \
 | 
						|
        }                                                                         \
 | 
						|
                                                                                  \
 | 
						|
        if (num_checks == 1)                                                      \
 | 
						|
            report(PREFIX "_%i", last_check);                                     \
 | 
						|
        else if (num_checks)                                                      \
 | 
						|
            report(PREFIX);                                                       \
 | 
						|
    } while (0)
 | 
						|
 | 
						|
void checkasm_check_av_tx(void)
 | 
						|
{
 | 
						|
    declare_func(void, AVTXContext *tx, void *out, void *in, ptrdiff_t stride);
 | 
						|
 | 
						|
    void *in      = av_malloc(16384*2*8);
 | 
						|
    void *out_ref = av_malloc(16384*2*8);
 | 
						|
    void *out_new = av_malloc(16384*2*8);
 | 
						|
 | 
						|
    randomize_complex(in, 16384, AVComplexFloat, SCALE_NOOP);
 | 
						|
    CHECK_TEMPLATE("float_fft", AV_TX_FLOAT_FFT, 0, AVComplexFloat, float, check_lens,
 | 
						|
                   !float_near_abs_eps_array(out_ref, out_new, EPS, len*2));
 | 
						|
 | 
						|
    CHECK_TEMPLATE("float_imdct", AV_TX_FLOAT_MDCT, 1, float, float, check_lens,
 | 
						|
                   !float_near_abs_eps_array(out_ref, out_new, EPS, len));
 | 
						|
 | 
						|
    randomize_complex(in, 16384, AVComplexDouble, SCALE_NOOP);
 | 
						|
    CHECK_TEMPLATE("double_fft", AV_TX_DOUBLE_FFT, 0, AVComplexDouble, double, check_lens,
 | 
						|
                   !double_near_abs_eps_array(out_ref, out_new, EPS, len*2));
 | 
						|
 | 
						|
    av_free(in);
 | 
						|
    av_free(out_ref);
 | 
						|
    av_free(out_new);
 | 
						|
 | 
						|
    if (!init) {
 | 
						|
        init = 1;
 | 
						|
        atexit(free_tx_refs);
 | 
						|
    }
 | 
						|
}
 |