Since c737f6edcef74a64f4d0ebcefa970bd31266d512 prescreening is nevertheless run because of a wrong check: "if (s->prescreen > 0)". s->prescreen is an array of two function pointers that is contained in the context and comparing it with 0 (i.e. NULL) is actually undefined behaviour, because NULL and s->prescreen do not point to the same object (NULL after all never points to any object). Nevertheless both Clang as well as GCC compile this to code that treat s->prescreen > 0 as true, leading to segfaults, because the code then tries to access the -1th member of an array. This commit fixes the check as well as another such check a few lines below. (Found via compiler warnings enabled by -pedantic: "ordered comparison between pointer and zero is an extension".) Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
		
			
				
	
	
		
			1174 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1174 lines
		
	
	
		
			38 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (C) 2010-2011 Kevin Stone
 | 
						|
 * Copyright (C) 2016 Paul B Mahol
 | 
						|
 *
 | 
						|
 * This file is part of FFmpeg.
 | 
						|
 *
 | 
						|
 * FFmpeg is free software; you can redistribute it and/or modify
 | 
						|
 * it under the terms of the GNU General Public License as published by
 | 
						|
 * the Free Software Foundation; either version 2 of the License, or
 | 
						|
 * (at your option) any later version.
 | 
						|
 *
 | 
						|
 * FFmpeg is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
 * GNU General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU General Public License along
 | 
						|
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
 | 
						|
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
						|
 */
 | 
						|
 | 
						|
#include <float.h>
 | 
						|
 | 
						|
#include "libavutil/avassert.h"
 | 
						|
#include "libavutil/common.h"
 | 
						|
#include "libavutil/float_dsp.h"
 | 
						|
#include "libavutil/imgutils.h"
 | 
						|
#include "libavutil/mem_internal.h"
 | 
						|
#include "libavutil/opt.h"
 | 
						|
#include "libavutil/pixdesc.h"
 | 
						|
#include "avfilter.h"
 | 
						|
#include "formats.h"
 | 
						|
#include "internal.h"
 | 
						|
#include "video.h"
 | 
						|
 | 
						|
static const size_t NNEDI_WEIGHTS_SIZE = 13574928;
 | 
						|
static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 };
 | 
						|
static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 };
 | 
						|
static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 };
 | 
						|
 | 
						|
typedef struct PrescreenerCoefficients {
 | 
						|
    DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
 | 
						|
    DECLARE_ALIGNED(32, float, bias_l0)[4];
 | 
						|
 | 
						|
    DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
 | 
						|
    DECLARE_ALIGNED(32, float, bias_l1)[4];
 | 
						|
 | 
						|
    DECLARE_ALIGNED(32, float, kernel_l2)[4][8];
 | 
						|
    DECLARE_ALIGNED(32, float, bias_l2)[4];
 | 
						|
} PrescreenerCoefficients;
 | 
						|
 | 
						|
typedef struct PredictorCoefficients {
 | 
						|
    int xdim, ydim, nns, nsize;
 | 
						|
    float *data;
 | 
						|
    float *softmax_q1;
 | 
						|
    float *elliott_q1;
 | 
						|
    float *softmax_bias_q1;
 | 
						|
    float *elliott_bias_q1;
 | 
						|
    float *softmax_q2;
 | 
						|
    float *elliott_q2;
 | 
						|
    float *softmax_bias_q2;
 | 
						|
    float *elliott_bias_q2;
 | 
						|
} PredictorCoefficients;
 | 
						|
 | 
						|
typedef struct NNEDIContext {
 | 
						|
    const AVClass *class;
 | 
						|
 | 
						|
    char *weights_file;
 | 
						|
 | 
						|
    AVFrame *prev;
 | 
						|
    int eof;
 | 
						|
    int64_t pts;
 | 
						|
 | 
						|
    AVFloatDSPContext *fdsp;
 | 
						|
    int depth;
 | 
						|
    int nb_planes;
 | 
						|
    int nb_threads;
 | 
						|
    int linesize[4];
 | 
						|
    int planewidth[4];
 | 
						|
    int planeheight[4];
 | 
						|
    int field_n;
 | 
						|
 | 
						|
    PrescreenerCoefficients prescreener[4];
 | 
						|
    PredictorCoefficients coeffs[2][5][7];
 | 
						|
 | 
						|
    float half;
 | 
						|
    float in_scale;
 | 
						|
    float out_scale;
 | 
						|
 | 
						|
    // Parameters
 | 
						|
    int deint;
 | 
						|
    int field;
 | 
						|
    int process_plane;
 | 
						|
    int nsize;
 | 
						|
    int nnsparam;
 | 
						|
    int qual;
 | 
						|
    int etype;
 | 
						|
    int pscrn;
 | 
						|
 | 
						|
    int input_size;
 | 
						|
    uint8_t **prescreen_buf;
 | 
						|
    float **input_buf;
 | 
						|
    float **output_buf;
 | 
						|
 | 
						|
    void (*read)(const uint8_t *src, float *dst,
 | 
						|
                 int src_stride, int dst_stride,
 | 
						|
                 int width, int height, float scale);
 | 
						|
    void (*write)(const float *src, uint8_t *dst,
 | 
						|
                  int src_stride, int dst_stride,
 | 
						|
                  int width, int height, int depth, float scale);
 | 
						|
    void (*prescreen[2])(AVFilterContext *ctx,
 | 
						|
                         const void *src, ptrdiff_t src_stride,
 | 
						|
                         uint8_t *prescreen, int N,
 | 
						|
                         const PrescreenerCoefficients *const coeffs);
 | 
						|
} NNEDIContext;
 | 
						|
 | 
						|
#define OFFSET(x) offsetof(NNEDIContext, x)
 | 
						|
#define RFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 | 
						|
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 | 
						|
 | 
						|
static const AVOption nnedi_options[] = {
 | 
						|
    {"weights",  "set weights file", OFFSET(weights_file),  AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
 | 
						|
    {"deint",         "set which frames to deinterlace", OFFSET(deint),         AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "deint" },
 | 
						|
        {"all",        "deinterlace all frames",                       0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "deint" },
 | 
						|
        {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "deint" },
 | 
						|
    {"field",  "set mode of operation", OFFSET(field),         AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, RFLAGS, "field" },
 | 
						|
        {"af", "use frame flags, both fields",  0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, RFLAGS, "field" },
 | 
						|
        {"a",  "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, RFLAGS, "field" },
 | 
						|
        {"t",  "use top field only",            0, AV_OPT_TYPE_CONST, {.i64=0},  0, 0, RFLAGS, "field" },
 | 
						|
        {"b",  "use bottom field only",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, RFLAGS, "field" },
 | 
						|
        {"tf", "use both fields, top first",    0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, RFLAGS, "field" },
 | 
						|
        {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, RFLAGS, "field" },
 | 
						|
    {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 15, RFLAGS },
 | 
						|
    {"nsize",  "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, RFLAGS, "nsize" },
 | 
						|
        {"s8x6",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s16x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s32x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s48x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s8x4",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s16x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, RFLAGS, "nsize" },
 | 
						|
        {"s32x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, RFLAGS, "nsize" },
 | 
						|
    {"nns",    "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, RFLAGS, "nns" },
 | 
						|
        {"n16",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nns" },
 | 
						|
        {"n32",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nns" },
 | 
						|
        {"n64",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nns" },
 | 
						|
        {"n128",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nns" },
 | 
						|
        {"n256",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nns" },
 | 
						|
    {"qual",  "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, RFLAGS, "qual" },
 | 
						|
        {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "qual" },
 | 
						|
        {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "qual" },
 | 
						|
    {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "etype" },
 | 
						|
        {"a",  "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
 | 
						|
        {"abs","weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
 | 
						|
        {"s",  "weights trained to minimize squared error",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
 | 
						|
        {"mse","weights trained to minimize squared error",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
 | 
						|
    {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 4, RFLAGS, "pscrn" },
 | 
						|
        {"none",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "pscrn" },
 | 
						|
        {"original",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "pscrn" },
 | 
						|
        {"new",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "pscrn" },
 | 
						|
        {"new2",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "pscrn" },
 | 
						|
        {"new3",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "pscrn" },
 | 
						|
    { NULL }
 | 
						|
};
 | 
						|
 | 
						|
AVFILTER_DEFINE_CLASS(nnedi);
 | 
						|
 | 
						|
static int config_output(AVFilterLink *outlink)
 | 
						|
{
 | 
						|
    AVFilterContext *ctx = outlink->src;
 | 
						|
 | 
						|
    outlink->time_base.num = ctx->inputs[0]->time_base.num;
 | 
						|
    outlink->time_base.den = ctx->inputs[0]->time_base.den * 2;
 | 
						|
    outlink->w             = ctx->inputs[0]->w;
 | 
						|
    outlink->h             = ctx->inputs[0]->h;
 | 
						|
 | 
						|
    outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
 | 
						|
                                   (AVRational){2, 1});
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int query_formats(AVFilterContext *ctx)
 | 
						|
{
 | 
						|
    static const enum AVPixelFormat pix_fmts[] = {
 | 
						|
        AV_PIX_FMT_GRAY8,
 | 
						|
        AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
 | 
						|
        AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
 | 
						|
        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
 | 
						|
        AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
 | 
						|
        AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
 | 
						|
        AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
 | 
						|
        AV_PIX_FMT_YUVJ411P,
 | 
						|
        AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
 | 
						|
        AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
 | 
						|
        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
 | 
						|
        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
 | 
						|
        AV_PIX_FMT_YUV440P10,
 | 
						|
        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
 | 
						|
        AV_PIX_FMT_YUV440P12,
 | 
						|
        AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
 | 
						|
        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
 | 
						|
        AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
 | 
						|
        AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16,
 | 
						|
        AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16,
 | 
						|
        AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
 | 
						|
        AV_PIX_FMT_GBRAP10,   AV_PIX_FMT_GBRAP12,    AV_PIX_FMT_GBRAP16,
 | 
						|
        AV_PIX_FMT_NONE
 | 
						|
    };
 | 
						|
 | 
						|
    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
 | 
						|
    if (!fmts_list)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
    return ff_set_common_formats(ctx, fmts_list);
 | 
						|
}
 | 
						|
 | 
						|
static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input,
 | 
						|
                     int n, float scale, float bias)
 | 
						|
{
 | 
						|
    float sum, y;
 | 
						|
 | 
						|
    sum = s->fdsp->scalarproduct_float(kernel, input, n);
 | 
						|
 | 
						|
    y = sum * scale + bias + 1e-20f;
 | 
						|
 | 
						|
    return y;
 | 
						|
}
 | 
						|
 | 
						|
static float elliott(float x)
 | 
						|
{
 | 
						|
    return x / (1.0f + fabsf(x));
 | 
						|
}
 | 
						|
 | 
						|
static void transform_elliott(float *input, int size)
 | 
						|
{
 | 
						|
    for (int i = 0; i < size; i++)
 | 
						|
        input[i] = elliott(input[i]);
 | 
						|
}
 | 
						|
 | 
						|
static void process_old(AVFilterContext *ctx,
 | 
						|
                        const void *src, ptrdiff_t src_stride,
 | 
						|
                        uint8_t *prescreen, int N,
 | 
						|
                        const PrescreenerCoefficients *const m_data)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    const float *src_p = src;
 | 
						|
 | 
						|
    // Adjust source pointer to point to top-left of filter window.
 | 
						|
    const float *window = src_p - 2 * src_stride - 5;
 | 
						|
 | 
						|
    for (int j = 0; j < N; j++) {
 | 
						|
        LOCAL_ALIGNED_32(float, input, [48]);
 | 
						|
        float state[12];
 | 
						|
 | 
						|
        for (int i = 0; i < 4; i++)
 | 
						|
            memcpy(input + i * 12, window + i * src_stride + j, 12 * sizeof(float));
 | 
						|
 | 
						|
        // Layer 0.
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 48, 1.0f, m_data->bias_l0[n]);
 | 
						|
        transform_elliott(state + 1, 3);
 | 
						|
 | 
						|
        // Layer 1.
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
 | 
						|
        transform_elliott(state + 4, 3);
 | 
						|
 | 
						|
        // Layer 2.
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]);
 | 
						|
 | 
						|
        prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void process_new(AVFilterContext *ctx,
 | 
						|
                        const void *src, ptrdiff_t src_stride,
 | 
						|
                        uint8_t *prescreen, int N,
 | 
						|
                        const PrescreenerCoefficients *const m_data)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    const float *src_p = src;
 | 
						|
 | 
						|
    // Adjust source pointer to point to top-left of filter window.
 | 
						|
    const float *window = src_p - 2 * src_stride - 6;
 | 
						|
 | 
						|
    for (int j = 0; j < N; j += 4) {
 | 
						|
        LOCAL_ALIGNED_32(float, input, [64]);
 | 
						|
        float state[8];
 | 
						|
 | 
						|
        for (int i = 0; i < 4; i++)
 | 
						|
            memcpy(input + i * 16, window + i * src_stride + j, 16 * sizeof(float));
 | 
						|
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 64, 1.0f, m_data->bias_l0[n]);
 | 
						|
        transform_elliott(state, 4);
 | 
						|
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
 | 
						|
 | 
						|
        for (int n = 0; n < 4; n++)
 | 
						|
            prescreen[j + n] = state[n + 4] > 0.f;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static int filter_offset(int nn, const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    return nn * model->nsize;
 | 
						|
}
 | 
						|
 | 
						|
static const float *softmax_q1_filter(int nn,
 | 
						|
                                      const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    return model->softmax_q1 + filter_offset(nn, model);
 | 
						|
}
 | 
						|
 | 
						|
static const float *elliott_q1_filter(int nn,
 | 
						|
                                      const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    return model->elliott_q1 + filter_offset(nn, model);
 | 
						|
}
 | 
						|
 | 
						|
static const float *softmax_q2_filter(int nn,
 | 
						|
                                      const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    return model->softmax_q2 + filter_offset(nn, model);
 | 
						|
}
 | 
						|
 | 
						|
static const float *elliott_q2_filter(int nn,
 | 
						|
                                      const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    return model->elliott_q2 + filter_offset(nn, model);
 | 
						|
}
 | 
						|
 | 
						|
static void gather_input(const float *src, ptrdiff_t src_stride,
 | 
						|
                         float *buf, float mstd[4],
 | 
						|
                         const PredictorCoefficients *const model)
 | 
						|
{
 | 
						|
    const float scale = 1.f / model->nsize;
 | 
						|
    float sum = 0.f;
 | 
						|
    float sum_sq = 0.f;
 | 
						|
    float tmp;
 | 
						|
 | 
						|
    for (int i = 0; i < model->ydim; i++) {
 | 
						|
        memcpy(buf, src, model->xdim * sizeof(float));
 | 
						|
 | 
						|
        for (int j = 0; j < model->xdim; j++) {
 | 
						|
            const float val = src[j];
 | 
						|
 | 
						|
            sum += val;
 | 
						|
            sum_sq += val * val;
 | 
						|
        }
 | 
						|
 | 
						|
        src += src_stride;
 | 
						|
        buf += model->xdim;
 | 
						|
    }
 | 
						|
 | 
						|
    mstd[0] = sum * scale;
 | 
						|
    mstd[3] = 0.f;
 | 
						|
 | 
						|
    tmp = sum_sq * scale - mstd[0] * mstd[0];
 | 
						|
    if (tmp < FLT_EPSILON) {
 | 
						|
        mstd[1] = 0.0f;
 | 
						|
        mstd[2] = 0.0f;
 | 
						|
    } else {
 | 
						|
        mstd[1] = sqrtf(tmp);
 | 
						|
        mstd[2] = 1.0f / mstd[1];
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static float softmax_exp(float x)
 | 
						|
{
 | 
						|
    return expf(av_clipf(x, -80.f, 80.f));
 | 
						|
}
 | 
						|
 | 
						|
static void transform_softmax_exp(float *input, int size)
 | 
						|
{
 | 
						|
    for (int i = 0; i < size; i++)
 | 
						|
        input[i] = softmax_exp(input[i]);
 | 
						|
}
 | 
						|
 | 
						|
static void wae5(const float *softmax, const float *el,
 | 
						|
                 int n, float mstd[4])
 | 
						|
{
 | 
						|
    float vsum = 0.0f, wsum = 0.0f;
 | 
						|
 | 
						|
    for (int i = 0; i < n; i++) {
 | 
						|
        vsum += softmax[i] * elliott(el[i]);
 | 
						|
        wsum += softmax[i];
 | 
						|
    }
 | 
						|
 | 
						|
    if (wsum > 1e-10f)
 | 
						|
        mstd[3] += (5.0f * vsum) / wsum * mstd[1] + mstd[0];
 | 
						|
    else
 | 
						|
        mstd[3] += mstd[0];
 | 
						|
}
 | 
						|
 | 
						|
static void predictor(AVFilterContext *ctx,
 | 
						|
                      const void *src, ptrdiff_t src_stride, void *dst,
 | 
						|
                      const uint8_t *prescreen, int N,
 | 
						|
                      const PredictorCoefficients *const model, int use_q2)
 | 
						|
{
 | 
						|
    const NNEDIContext *const s = ctx->priv;
 | 
						|
    const float *src_p = src;
 | 
						|
    float *dst_p = dst;
 | 
						|
 | 
						|
    // Adjust source pointer to point to top-left of filter window.
 | 
						|
    const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1);
 | 
						|
    const int filter_size = model->nsize;
 | 
						|
    const int nns = model->nns;
 | 
						|
 | 
						|
    for (int i = 0; i < N; i++) {
 | 
						|
        LOCAL_ALIGNED_32(float, input, [48 * 6]);
 | 
						|
        float activation[256 * 2];
 | 
						|
        float mstd[4];
 | 
						|
        float scale;
 | 
						|
 | 
						|
        if (prescreen[i])
 | 
						|
            continue;
 | 
						|
 | 
						|
        gather_input(window + i, src_stride, input, mstd, model);
 | 
						|
        scale = mstd[2];
 | 
						|
 | 
						|
        for (int nn = 0; nn < nns; nn++)
 | 
						|
            activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]);
 | 
						|
 | 
						|
        for (int nn = 0; nn < nns; nn++)
 | 
						|
            activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]);
 | 
						|
 | 
						|
        transform_softmax_exp(activation, nns);
 | 
						|
        wae5(activation, activation + nns, nns, mstd);
 | 
						|
 | 
						|
        if (use_q2) {
 | 
						|
            for (int nn = 0; nn < nns; nn++)
 | 
						|
                activation[nn] = dot_dsp(s, softmax_q2_filter(nn, model), input, filter_size, scale, model->softmax_bias_q2[nn]);
 | 
						|
 | 
						|
            for (int nn = 0; nn < nns; nn++)
 | 
						|
                activation[nns + nn] = dot_dsp(s, elliott_q2_filter(nn, model), input, filter_size, scale, model->elliott_bias_q2[nn]);
 | 
						|
 | 
						|
            transform_softmax_exp(activation, nns);
 | 
						|
            wae5(activation, activation + nns, nns, mstd);
 | 
						|
        }
 | 
						|
 | 
						|
        dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void read_bytes(const uint8_t *src, float *dst,
 | 
						|
                       int src_stride, int dst_stride,
 | 
						|
                       int width, int height, float scale)
 | 
						|
{
 | 
						|
    for (int y = 0; y < height; y++) {
 | 
						|
        for (int x = 0; x < 32; x++)
 | 
						|
            dst[-x - 1] = src[x];
 | 
						|
 | 
						|
        for (int x = 0; x < width; x++)
 | 
						|
            dst[x] = src[x];
 | 
						|
 | 
						|
        for (int x = 0; x < 32; x++)
 | 
						|
            dst[width + x] = src[width - x - 1];
 | 
						|
 | 
						|
        dst += dst_stride;
 | 
						|
        src += src_stride;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void read_words(const uint8_t *srcp, float *dst,
 | 
						|
                       int src_stride, int dst_stride,
 | 
						|
                       int width, int height, float scale)
 | 
						|
{
 | 
						|
    const uint16_t *src = (const uint16_t *)srcp;
 | 
						|
 | 
						|
    src_stride /= 2;
 | 
						|
 | 
						|
    for (int y = 0; y < height; y++) {
 | 
						|
        for (int x = 0; x < 32; x++)
 | 
						|
            dst[-x - 1] = src[x] * scale;
 | 
						|
 | 
						|
        for (int x = 0; x < width; x++)
 | 
						|
            dst[x] = src[x] * scale;
 | 
						|
 | 
						|
        for (int x = 0; x < 32; x++)
 | 
						|
            dst[width + x] = src[width - x - 1] * scale;
 | 
						|
 | 
						|
        dst += dst_stride;
 | 
						|
        src += src_stride;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void write_bytes(const float *src, uint8_t *dst,
 | 
						|
                        int src_stride, int dst_stride,
 | 
						|
                        int width, int height, int depth,
 | 
						|
                        float scale)
 | 
						|
{
 | 
						|
    for (int y = 0; y < height; y++) {
 | 
						|
        for (int x = 0; x < width; x++)
 | 
						|
            dst[x] = av_clip_uint8(src[x]);
 | 
						|
 | 
						|
        dst += dst_stride;
 | 
						|
        src += src_stride;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void write_words(const float *src, uint8_t *dstp,
 | 
						|
                        int src_stride, int dst_stride,
 | 
						|
                        int width, int height, int depth,
 | 
						|
                        float scale)
 | 
						|
{
 | 
						|
    uint16_t *dst = (uint16_t *)dstp;
 | 
						|
 | 
						|
    dst_stride /= 2;
 | 
						|
 | 
						|
    for (int y = 0; y < height; y++) {
 | 
						|
        for (int x = 0; x < width; x++)
 | 
						|
            dst[x] = av_clip_uintp2_c(src[x] * scale, depth);
 | 
						|
 | 
						|
        dst += dst_stride;
 | 
						|
        src += src_stride;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void interpolation(const void *src, ptrdiff_t src_stride,
 | 
						|
                          void *dst, const uint8_t *prescreen, int n)
 | 
						|
{
 | 
						|
    const float *src_p = src;
 | 
						|
    float *dst_p = dst;
 | 
						|
    const float *window = src_p - 2 * src_stride;
 | 
						|
 | 
						|
    for (int i = 0; i < n; i++) {
 | 
						|
        float accum = 0.0f;
 | 
						|
 | 
						|
        if (!prescreen[i])
 | 
						|
            continue;
 | 
						|
 | 
						|
        accum += (-3.0f / 32.0f) * window[0 * src_stride + i];
 | 
						|
        accum += (19.0f / 32.0f) * window[1 * src_stride + i];
 | 
						|
        accum += (19.0f / 32.0f) * window[2 * src_stride + i];
 | 
						|
        accum += (-3.0f / 32.0f) * window[3 * src_stride + i];
 | 
						|
 | 
						|
        dst_p[i] = accum;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 | 
						|
{
 | 
						|
    const NNEDIContext *const s = ctx->priv;
 | 
						|
    AVFrame *out = arg;
 | 
						|
    AVFrame *in = s->prev;
 | 
						|
    const float in_scale = s->in_scale;
 | 
						|
    const float out_scale = s->out_scale;
 | 
						|
    const int depth = s->depth;
 | 
						|
    const int interlaced = in->interlaced_frame;
 | 
						|
    const int tff = s->field_n == (s->field < 0 ? interlaced ? in->top_field_first : 1 :
 | 
						|
                                  (s->field & 1) ^ 1);
 | 
						|
 | 
						|
 | 
						|
    for (int p = 0; p < s->nb_planes; p++) {
 | 
						|
        const int height = s->planeheight[p];
 | 
						|
        const int width = s->planewidth[p];
 | 
						|
        const int slice_start = 2 * ((height / 2 * jobnr) / nb_jobs);
 | 
						|
        const int slice_end = 2 * ((height / 2 * (jobnr+1)) / nb_jobs);
 | 
						|
        const uint8_t *src_data = in->data[p];
 | 
						|
        uint8_t *dst_data = out->data[p];
 | 
						|
        uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
 | 
						|
        const int src_linesize = in->linesize[p];
 | 
						|
        const int dst_linesize = out->linesize[p];
 | 
						|
        uint8_t *prescreen_buf = s->prescreen_buf[jobnr];
 | 
						|
        float *srcbuf = s->input_buf[jobnr];
 | 
						|
        const int srcbuf_stride = width + 64;
 | 
						|
        float *dstbuf = s->output_buf[jobnr];
 | 
						|
        const int dstbuf_stride = width;
 | 
						|
        const int slice_height = (slice_end - slice_start) / 2;
 | 
						|
        const int last_slice = slice_end == height;
 | 
						|
        const uint8_t *in_line;
 | 
						|
        uint8_t *out_line;
 | 
						|
        int y_out;
 | 
						|
 | 
						|
        if (!(s->process_plane & (1 << p))) {
 | 
						|
            av_image_copy_plane(dst, out->linesize[p],
 | 
						|
                                in->data[p] + slice_start * in->linesize[p],
 | 
						|
                                in->linesize[p],
 | 
						|
                                s->linesize[p], slice_end - slice_start);
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        y_out    = slice_start + (tff ^ (slice_start & 1));
 | 
						|
        in_line  = src_data + (y_out * src_linesize);
 | 
						|
        out_line = dst_data + (y_out * dst_linesize);
 | 
						|
 | 
						|
        while (y_out < slice_end) {
 | 
						|
            memcpy(out_line, in_line, s->linesize[p]);
 | 
						|
            y_out += 2;
 | 
						|
            in_line  += src_linesize * 2;
 | 
						|
            out_line += dst_linesize * 2;
 | 
						|
        }
 | 
						|
 | 
						|
        y_out = slice_start + ((!tff) ^ (slice_start & 1));
 | 
						|
 | 
						|
        s->read(src_data + FFMAX(y_out - 5, tff) * src_linesize,
 | 
						|
                srcbuf + 32,
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
        srcbuf += srcbuf_stride;
 | 
						|
 | 
						|
        s->read(src_data + FFMAX(y_out - 3, tff) * src_linesize,
 | 
						|
                srcbuf + 32,
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
        srcbuf += srcbuf_stride;
 | 
						|
 | 
						|
        s->read(src_data + FFMAX(y_out - 1, tff) * src_linesize,
 | 
						|
                srcbuf + 32,
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
        srcbuf += srcbuf_stride;
 | 
						|
 | 
						|
        in_line  = src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize;
 | 
						|
        out_line = dst_data + (y_out * dst_linesize);
 | 
						|
 | 
						|
        s->read(in_line, srcbuf + 32, src_linesize * 2, srcbuf_stride,
 | 
						|
                width, slice_height - last_slice, in_scale);
 | 
						|
 | 
						|
        y_out += (slice_height - last_slice) * 2;
 | 
						|
 | 
						|
        s->read(src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize,
 | 
						|
                srcbuf + 32 + srcbuf_stride * (slice_height - last_slice),
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
 | 
						|
        s->read(src_data + FFMIN(y_out + 3, height - 1 - !tff) * src_linesize,
 | 
						|
                srcbuf + 32 + srcbuf_stride * (slice_height + 1 - last_slice),
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
 | 
						|
        s->read(src_data + FFMIN(y_out + 5, height - 1 - !tff) * src_linesize,
 | 
						|
                srcbuf + 32 + srcbuf_stride * (slice_height + 2 - last_slice),
 | 
						|
                src_linesize * 2, srcbuf_stride,
 | 
						|
                width, 1, in_scale);
 | 
						|
 | 
						|
        for (int y = 0; y < slice_end - slice_start; y += 2) {
 | 
						|
            if (s->pscrn > 0)
 | 
						|
                s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
 | 
						|
                             srcbuf_stride, prescreen_buf, width,
 | 
						|
                             &s->prescreener[s->pscrn - 1]);
 | 
						|
 | 
						|
            predictor(ctx,
 | 
						|
                      srcbuf + (y / 2) * srcbuf_stride + 32,
 | 
						|
                      srcbuf_stride,
 | 
						|
                      dstbuf + (y / 2) * dstbuf_stride,
 | 
						|
                      prescreen_buf, width,
 | 
						|
                      &s->coeffs[s->etype][s->nnsparam][s->nsize], s->qual == 2);
 | 
						|
 | 
						|
            if (s->pscrn > 0)
 | 
						|
                interpolation(srcbuf + (y / 2) * srcbuf_stride + 32,
 | 
						|
                              srcbuf_stride,
 | 
						|
                              dstbuf + (y / 2) * dstbuf_stride,
 | 
						|
                              prescreen_buf, width);
 | 
						|
        }
 | 
						|
 | 
						|
        s->write(dstbuf, out_line, dstbuf_stride, dst_linesize * 2,
 | 
						|
                 width, slice_height, depth, out_scale);
 | 
						|
    }
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int get_frame(AVFilterContext *ctx, int is_second)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    AVFilterLink *outlink = ctx->outputs[0];
 | 
						|
    AVFrame *dst;
 | 
						|
 | 
						|
    dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
 | 
						|
    if (!dst)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
    av_frame_copy_props(dst, s->prev);
 | 
						|
    dst->interlaced_frame = 0;
 | 
						|
    dst->pts = s->pts;
 | 
						|
 | 
						|
    ctx->internal->execute(ctx, filter_slice, dst, NULL, FFMIN(s->planeheight[1] / 2, s->nb_threads));
 | 
						|
 | 
						|
    if (s->field == -2 || s->field > 1)
 | 
						|
        s->field_n = !s->field_n;
 | 
						|
 | 
						|
    return ff_filter_frame(outlink, dst);
 | 
						|
}
 | 
						|
 | 
						|
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 | 
						|
{
 | 
						|
    AVFilterContext *ctx = inlink->dst;
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    int ret;
 | 
						|
 | 
						|
    if (!s->prev) {
 | 
						|
        s->prev = in;
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
    if ((s->deint && !in->interlaced_frame) || ctx->is_disabled) {
 | 
						|
        s->prev->pts *= 2;
 | 
						|
        ret = ff_filter_frame(ctx->outputs[0], s->prev);
 | 
						|
        s->prev = in;
 | 
						|
        return ret;
 | 
						|
    }
 | 
						|
 | 
						|
    s->pts = s->prev->pts * 2;
 | 
						|
    ret = get_frame(ctx, 0);
 | 
						|
    if (ret < 0 || (s->field > -2 && s->field < 2)) {
 | 
						|
        av_frame_free(&s->prev);
 | 
						|
        s->prev = in;
 | 
						|
        return ret;
 | 
						|
    }
 | 
						|
 | 
						|
    s->pts = s->prev->pts + in->pts;
 | 
						|
    ret = get_frame(ctx, 1);
 | 
						|
    av_frame_free(&s->prev);
 | 
						|
    s->prev = in;
 | 
						|
    return ret;
 | 
						|
}
 | 
						|
 | 
						|
static int request_frame(AVFilterLink *link)
 | 
						|
{
 | 
						|
    AVFilterContext *ctx = link->src;
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    int ret;
 | 
						|
 | 
						|
    if (s->eof)
 | 
						|
        return AVERROR_EOF;
 | 
						|
 | 
						|
    ret  = ff_request_frame(ctx->inputs[0]);
 | 
						|
 | 
						|
    if (ret == AVERROR_EOF && s->prev) {
 | 
						|
        AVFrame *next = av_frame_clone(s->prev);
 | 
						|
 | 
						|
        if (!next)
 | 
						|
            return AVERROR(ENOMEM);
 | 
						|
 | 
						|
        next->pts = s->prev->pts + av_rescale_q(1, av_inv_q(ctx->outputs[0]->frame_rate),
 | 
						|
                                                ctx->outputs[0]->time_base);
 | 
						|
        s->eof = 1;
 | 
						|
 | 
						|
        ret = filter_frame(ctx->inputs[0], next);
 | 
						|
    } else if (ret < 0) {
 | 
						|
        return ret;
 | 
						|
    }
 | 
						|
 | 
						|
    return ret;
 | 
						|
}
 | 
						|
 | 
						|
static void copy_weights(float *dst, int n, const float **data)
 | 
						|
{
 | 
						|
    memcpy(dst, *data, n * sizeof(float));
 | 
						|
    *data += n;
 | 
						|
}
 | 
						|
 | 
						|
static float *allocate(float **ptr, int size)
 | 
						|
{
 | 
						|
    float *ret = *ptr;
 | 
						|
 | 
						|
    *ptr += size;
 | 
						|
 | 
						|
    return ret;
 | 
						|
}
 | 
						|
 | 
						|
static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int nns)
 | 
						|
{
 | 
						|
    int filter_size = nns * xdim * ydim;
 | 
						|
    int bias_size = nns;
 | 
						|
    float *data;
 | 
						|
 | 
						|
    data = av_calloc(filter_size + bias_size, 4 * sizeof(float));
 | 
						|
    if (!data)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
 | 
						|
    coeffs->data = data;
 | 
						|
    coeffs->xdim = xdim;
 | 
						|
    coeffs->ydim = ydim;
 | 
						|
    coeffs->nsize = xdim * ydim;
 | 
						|
    coeffs->nns  = nns;
 | 
						|
 | 
						|
    coeffs->softmax_q1 = allocate(&data, filter_size);
 | 
						|
    coeffs->elliott_q1 = allocate(&data, filter_size);
 | 
						|
    coeffs->softmax_bias_q1 = allocate(&data, bias_size);
 | 
						|
    coeffs->elliott_bias_q1 = allocate(&data, bias_size);
 | 
						|
 | 
						|
    coeffs->softmax_q2 = allocate(&data, filter_size);
 | 
						|
    coeffs->elliott_q2 = allocate(&data, filter_size);
 | 
						|
    coeffs->softmax_bias_q2 = allocate(&data, bias_size);
 | 
						|
    coeffs->elliott_bias_q2 = allocate(&data, bias_size);
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int read_weights(AVFilterContext *ctx, const float *bdata)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    int ret;
 | 
						|
 | 
						|
    copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata);
 | 
						|
    copy_weights(s->prescreener[0].bias_l0, 4, &bdata);
 | 
						|
 | 
						|
    copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata);
 | 
						|
    copy_weights(s->prescreener[0].bias_l1, 4, &bdata);
 | 
						|
 | 
						|
    copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata);
 | 
						|
    copy_weights(s->prescreener[0].bias_l2, 4, &bdata);
 | 
						|
 | 
						|
    for (int i = 0; i < 3; i++) {
 | 
						|
        PrescreenerCoefficients *data = &s->prescreener[i + 1];
 | 
						|
        float kernel_l0_shuffled[4 * 64];
 | 
						|
        float kernel_l1_shuffled[4 * 4];
 | 
						|
 | 
						|
        copy_weights(kernel_l0_shuffled, 4 * 64, &bdata);
 | 
						|
        copy_weights(data->bias_l0, 4, &bdata);
 | 
						|
 | 
						|
        copy_weights(kernel_l1_shuffled, 4 * 4, &bdata);
 | 
						|
        copy_weights(data->bias_l1, 4, &bdata);
 | 
						|
 | 
						|
        for (int n = 0; n < 4; n++) {
 | 
						|
            for (int k = 0; k < 64; k++)
 | 
						|
                data->kernel_l0[n][k] = kernel_l0_shuffled[(k / 8) * 32 + n * 8 + k % 8];
 | 
						|
            for (int k = 0; k < 4; k++)
 | 
						|
                data->kernel_l1[n][k] = kernel_l1_shuffled[k * 4 + n];
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    for (int m = 0; m < 2; m++) {
 | 
						|
        // Grouping by neuron count.
 | 
						|
        for (int i = 0; i < 5; i++) {
 | 
						|
            const int nns = NNEDI_NNS[i];
 | 
						|
 | 
						|
            // Grouping by window size.
 | 
						|
            for (int j = 0; j < 7; j++) {
 | 
						|
                PredictorCoefficients *model = &s->coeffs[m][i][j];
 | 
						|
                const int xdim = NNEDI_XDIM[j];
 | 
						|
                const int ydim = NNEDI_YDIM[j];
 | 
						|
                const int filter_size = xdim * ydim;
 | 
						|
 | 
						|
                ret = allocate_model(model, xdim, ydim, nns);
 | 
						|
                if (ret < 0)
 | 
						|
                    return ret;
 | 
						|
 | 
						|
                // Quality 1 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
 | 
						|
                copy_weights(model->softmax_q1, nns * filter_size, &bdata);
 | 
						|
                copy_weights(model->elliott_q1, nns * filter_size, &bdata);
 | 
						|
 | 
						|
                // Quality 1 model bias. NNS[i] * 2 coefficients.
 | 
						|
                copy_weights(model->softmax_bias_q1, nns, &bdata);
 | 
						|
                copy_weights(model->elliott_bias_q1, nns, &bdata);
 | 
						|
 | 
						|
                // Quality 2 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
 | 
						|
                copy_weights(model->softmax_q2, nns * filter_size, &bdata);
 | 
						|
                copy_weights(model->elliott_q2, nns * filter_size, &bdata);
 | 
						|
 | 
						|
                // Quality 2 model bias. NNS[i] * 2 coefficients.
 | 
						|
                copy_weights(model->softmax_bias_q2, nns, &bdata);
 | 
						|
                copy_weights(model->elliott_bias_q2, nns, &bdata);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static float mean(const float *input, int size)
 | 
						|
{
 | 
						|
    float sum = 0.f;
 | 
						|
 | 
						|
    for (int i = 0; i < size; i++)
 | 
						|
        sum += input[i];
 | 
						|
 | 
						|
    return sum / size;
 | 
						|
}
 | 
						|
 | 
						|
static void transform(float *input, int size, float mean, float half)
 | 
						|
{
 | 
						|
    for (int i = 0; i < size; i++)
 | 
						|
        input[i] = (input[i] - mean) / half;
 | 
						|
}
 | 
						|
 | 
						|
static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half)
 | 
						|
{
 | 
						|
    for (int n = 0; n < 4; n++) {
 | 
						|
        float m = mean(coeffs->kernel_l0[n], 48);
 | 
						|
 | 
						|
        transform(coeffs->kernel_l0[n], 48, m, half);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half)
 | 
						|
{
 | 
						|
    for (int n = 0; n < 4; n++) {
 | 
						|
        float m = mean(coeffs->kernel_l0[n], 64);
 | 
						|
 | 
						|
        transform(coeffs->kernel_l0[n], 64, m, half);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static void subtract_mean_predictor(PredictorCoefficients *model)
 | 
						|
{
 | 
						|
    const int filter_size = model->nsize;
 | 
						|
    const int nns = model->nns;
 | 
						|
    const float scale = 1.f / nns;
 | 
						|
 | 
						|
    double softmax_means[256]; // Average of individual softmax filters.
 | 
						|
    double elliott_means[256]; // Average of individual elliott filters.
 | 
						|
    double mean_filter[48 * 6]; // Pointwise average of all softmax filters.
 | 
						|
    double mean_bias;
 | 
						|
 | 
						|
    // Quality 1.
 | 
						|
    for (int nn = 0; nn < nns; nn++) {
 | 
						|
        softmax_means[nn] = mean(model->softmax_q1 + nn * filter_size, filter_size);
 | 
						|
        elliott_means[nn] = mean(model->elliott_q1 + nn * filter_size, filter_size);
 | 
						|
 | 
						|
        for (int k = 0; k < filter_size; k++)
 | 
						|
            mean_filter[k] += model->softmax_q1[nn * filter_size + k] - softmax_means[nn];
 | 
						|
    }
 | 
						|
 | 
						|
    for (int k = 0; k < filter_size; k++)
 | 
						|
        mean_filter[k] *= scale;
 | 
						|
 | 
						|
    mean_bias = mean(model->softmax_bias_q1, nns);
 | 
						|
 | 
						|
    for (int nn = 0; nn < nns; nn++) {
 | 
						|
        for (int k = 0; k < filter_size; k++) {
 | 
						|
            model->softmax_q1[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
 | 
						|
            model->elliott_q1[nn * filter_size + k] -= elliott_means[nn];
 | 
						|
        }
 | 
						|
        model->softmax_bias_q1[nn] -= mean_bias;
 | 
						|
    }
 | 
						|
 | 
						|
    // Quality 2.
 | 
						|
    memset(mean_filter, 0, sizeof(mean_filter));
 | 
						|
 | 
						|
    for (int nn = 0; nn < nns; nn++) {
 | 
						|
        softmax_means[nn] = mean(model->softmax_q2 + nn * filter_size, filter_size);
 | 
						|
        elliott_means[nn] = mean(model->elliott_q2 + nn * filter_size, filter_size);
 | 
						|
 | 
						|
        for (int k = 0; k < filter_size; k++) {
 | 
						|
            mean_filter[k] += model->softmax_q2[nn * filter_size + k] - softmax_means[nn];
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    for (int k = 0; k < filter_size; k++)
 | 
						|
        mean_filter[k] *= scale;
 | 
						|
 | 
						|
    mean_bias = mean(model->softmax_bias_q2, nns);
 | 
						|
 | 
						|
    for (int nn = 0; nn < nns; nn++) {
 | 
						|
        for (int k = 0; k < filter_size; k++) {
 | 
						|
            model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
 | 
						|
            model->elliott_q2[nn * filter_size + k] -= elliott_means[nn];
 | 
						|
        }
 | 
						|
 | 
						|
        model->softmax_bias_q2[nn] -= mean_bias;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static av_cold int init(AVFilterContext *ctx)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    FILE *weights_file = NULL;
 | 
						|
    int64_t weights_size;
 | 
						|
    float *bdata;
 | 
						|
    size_t bytes_read;
 | 
						|
    int ret = 0;
 | 
						|
 | 
						|
    weights_file = av_fopen_utf8(s->weights_file, "rb");
 | 
						|
    if (!weights_file) {
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
 | 
						|
        return AVERROR(EINVAL);
 | 
						|
    }
 | 
						|
 | 
						|
    if (fseek(weights_file, 0, SEEK_END)) {
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
 | 
						|
        fclose(weights_file);
 | 
						|
        return AVERROR(EINVAL);
 | 
						|
    }
 | 
						|
 | 
						|
    weights_size = ftell(weights_file);
 | 
						|
 | 
						|
    if (weights_size == -1) {
 | 
						|
        fclose(weights_file);
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
 | 
						|
        return AVERROR(EINVAL);
 | 
						|
    } else if (weights_size != NNEDI_WEIGHTS_SIZE) {
 | 
						|
        fclose(weights_file);
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
 | 
						|
        return AVERROR(EINVAL);
 | 
						|
    }
 | 
						|
 | 
						|
    if (fseek(weights_file, 0, SEEK_SET)) {
 | 
						|
        fclose(weights_file);
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
 | 
						|
        return AVERROR(EINVAL);
 | 
						|
    }
 | 
						|
 | 
						|
    bdata = av_malloc(NNEDI_WEIGHTS_SIZE);
 | 
						|
    if (!bdata) {
 | 
						|
        fclose(weights_file);
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
    }
 | 
						|
 | 
						|
    bytes_read = fread(bdata, 1, NNEDI_WEIGHTS_SIZE, weights_file);
 | 
						|
    if (bytes_read != NNEDI_WEIGHTS_SIZE) {
 | 
						|
        fclose(weights_file);
 | 
						|
        ret = AVERROR_INVALIDDATA;
 | 
						|
        av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
 | 
						|
        goto fail;
 | 
						|
    }
 | 
						|
 | 
						|
    fclose(weights_file);
 | 
						|
 | 
						|
    s->fdsp = avpriv_float_dsp_alloc(0);
 | 
						|
    if (!s->fdsp) {
 | 
						|
        ret = AVERROR(ENOMEM);
 | 
						|
        goto fail;
 | 
						|
    }
 | 
						|
 | 
						|
    ret = read_weights(ctx, bdata);
 | 
						|
    if (ret < 0)
 | 
						|
        goto fail;
 | 
						|
 | 
						|
fail:
 | 
						|
    av_free(bdata);
 | 
						|
    return ret;
 | 
						|
}
 | 
						|
 | 
						|
static int config_input(AVFilterLink *inlink)
 | 
						|
{
 | 
						|
    AVFilterContext *ctx = inlink->dst;
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
 | 
						|
    int ret;
 | 
						|
 | 
						|
    s->depth = desc->comp[0].depth;
 | 
						|
    s->nb_threads = ff_filter_get_nb_threads(ctx);
 | 
						|
    s->nb_planes = av_pix_fmt_count_planes(inlink->format);
 | 
						|
    if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
 | 
						|
        return ret;
 | 
						|
 | 
						|
    s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
 | 
						|
    s->planewidth[0] = s->planewidth[3] = inlink->w;
 | 
						|
    s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
 | 
						|
    s->planeheight[0] = s->planeheight[3] = inlink->h;
 | 
						|
 | 
						|
    s->half = ((1 << 8) - 1) / 2.f;
 | 
						|
    s->out_scale = 1 << (s->depth - 8);
 | 
						|
    s->in_scale = 1.f / s->out_scale;
 | 
						|
 | 
						|
    switch (s->depth) {
 | 
						|
    case 8:
 | 
						|
        s->read  = read_bytes;
 | 
						|
        s->write = write_bytes;
 | 
						|
        break;
 | 
						|
    default:
 | 
						|
        s->read  = read_words;
 | 
						|
        s->write = write_words;
 | 
						|
        break;
 | 
						|
    }
 | 
						|
 | 
						|
    subtract_mean_old(&s->prescreener[0], s->half);
 | 
						|
    subtract_mean_new(&s->prescreener[1], s->half);
 | 
						|
    subtract_mean_new(&s->prescreener[2], s->half);
 | 
						|
    subtract_mean_new(&s->prescreener[3], s->half);
 | 
						|
 | 
						|
    s->prescreen[0] = process_old;
 | 
						|
    s->prescreen[1] = process_new;
 | 
						|
 | 
						|
    for (int i = 0; i < 2; i++) {
 | 
						|
        for (int j = 0; j < 5; j++) {
 | 
						|
            for (int k = 0; k < 7; k++)
 | 
						|
                subtract_mean_predictor(&s->coeffs[i][j][k]);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6);
 | 
						|
    s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf));
 | 
						|
    if (!s->input_buf)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads; i++) {
 | 
						|
        s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf));
 | 
						|
        if (!s->input_buf[i])
 | 
						|
            return AVERROR(ENOMEM);
 | 
						|
    }
 | 
						|
 | 
						|
    s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf));
 | 
						|
    if (!s->output_buf)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads; i++) {
 | 
						|
        s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf));
 | 
						|
        if (!s->output_buf[i])
 | 
						|
            return AVERROR(ENOMEM);
 | 
						|
    }
 | 
						|
 | 
						|
    s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf));
 | 
						|
    if (!s->prescreen_buf)
 | 
						|
        return AVERROR(ENOMEM);
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads; i++) {
 | 
						|
        s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf));
 | 
						|
        if (!s->prescreen_buf[i])
 | 
						|
            return AVERROR(ENOMEM);
 | 
						|
    }
 | 
						|
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static av_cold void uninit(AVFilterContext *ctx)
 | 
						|
{
 | 
						|
    NNEDIContext *s = ctx->priv;
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++)
 | 
						|
        av_freep(&s->prescreen_buf[i]);
 | 
						|
 | 
						|
    av_freep(&s->prescreen_buf);
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads && s->input_buf; i++)
 | 
						|
        av_freep(&s->input_buf[i]);
 | 
						|
 | 
						|
    av_freep(&s->input_buf);
 | 
						|
 | 
						|
    for (int i = 0; i < s->nb_threads && s->output_buf; i++)
 | 
						|
        av_freep(&s->output_buf[i]);
 | 
						|
 | 
						|
    av_freep(&s->output_buf);
 | 
						|
    av_freep(&s->fdsp);
 | 
						|
 | 
						|
    for (int i = 0; i < 2; i++) {
 | 
						|
        for (int j = 0; j < 5; j++) {
 | 
						|
            for (int k = 0; k < 7; k++) {
 | 
						|
                av_freep(&s->coeffs[i][j][k].data);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    av_frame_free(&s->prev);
 | 
						|
}
 | 
						|
 | 
						|
static const AVFilterPad inputs[] = {
 | 
						|
    {
 | 
						|
        .name          = "default",
 | 
						|
        .type          = AVMEDIA_TYPE_VIDEO,
 | 
						|
        .filter_frame  = filter_frame,
 | 
						|
        .config_props  = config_input,
 | 
						|
    },
 | 
						|
    { NULL }
 | 
						|
};
 | 
						|
 | 
						|
static const AVFilterPad outputs[] = {
 | 
						|
    {
 | 
						|
        .name          = "default",
 | 
						|
        .type          = AVMEDIA_TYPE_VIDEO,
 | 
						|
        .config_props  = config_output,
 | 
						|
        .request_frame = request_frame,
 | 
						|
    },
 | 
						|
    { NULL }
 | 
						|
};
 | 
						|
 | 
						|
AVFilter ff_vf_nnedi = {
 | 
						|
    .name          = "nnedi",
 | 
						|
    .description   = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
 | 
						|
    .priv_size     = sizeof(NNEDIContext),
 | 
						|
    .priv_class    = &nnedi_class,
 | 
						|
    .init          = init,
 | 
						|
    .uninit        = uninit,
 | 
						|
    .query_formats = query_formats,
 | 
						|
    .inputs        = inputs,
 | 
						|
    .outputs       = outputs,
 | 
						|
    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 | 
						|
    .process_command = ff_filter_process_command,
 | 
						|
};
 |