diff --git a/configure b/configure
index 42b58627ae..779310974b 100755
--- a/configure
+++ b/configure
@@ -960,6 +960,7 @@ CONFIG_LIST="
     rtpdec
     runtime_cpudetect
     shared
+    sinewin
     small
     sram
     static
@@ -1238,8 +1239,8 @@ mdct_select="fft"
 rdft_select="fft"
 
 # decoders / encoders / hardware accelerators
-aac_decoder_select="mdct rdft"
-aac_encoder_select="mdct"
+aac_decoder_select="mdct rdft sinewin"
+aac_encoder_select="mdct sinewin"
 aac_latm_decoder_select="aac_decoder aac_latm_parser"
 ac3_decoder_select="mdct ac3_parser"
 ac3_encoder_select="mdct ac3dsp"
@@ -1247,12 +1248,12 @@ ac3_fixed_encoder_select="ac3dsp"
 alac_encoder_select="lpc"
 amrnb_decoder_select="lsp"
 amrwb_decoder_select="lsp"
-atrac1_decoder_select="mdct"
+atrac1_decoder_select="mdct sinewin"
 atrac3_decoder_select="mdct"
 binkaudio_dct_decoder_select="mdct rdft dct"
 binkaudio_rdft_decoder_select="mdct rdft"
 cavs_decoder_select="golomb"
-cook_decoder_select="mdct"
+cook_decoder_select="mdct sinewin"
 cscd_decoder_suggest="zlib"
 dca_decoder_select="mdct"
 dnxhd_encoder_select="aandct"
@@ -1315,8 +1316,8 @@ msmpeg4v2_decoder_select="h263_decoder"
 msmpeg4v2_encoder_select="h263_encoder"
 msmpeg4v3_decoder_select="h263_decoder"
 msmpeg4v3_encoder_select="h263_encoder"
-nellymoser_decoder_select="mdct"
-nellymoser_encoder_select="mdct"
+nellymoser_decoder_select="mdct sinewin"
+nellymoser_encoder_select="mdct sinewin"
 png_decoder_select="zlib"
 png_encoder_select="zlib"
 qcelp_decoder_select="lsp"
@@ -1343,7 +1344,7 @@ tiff_decoder_suggest="zlib"
 tiff_encoder_suggest="zlib"
 truehd_decoder_select="mlp_decoder"
 tscc_decoder_select="zlib"
-twinvq_decoder_select="mdct lsp"
+twinvq_decoder_select="mdct lsp sinewin"
 vc1_decoder_select="h263_decoder"
 vc1_crystalhd_decoder_select="crystalhd"
 vc1_dxva2_hwaccel_deps="dxva2api_h DXVA_PictureParameters_wDecodedPictureIndex"
@@ -1356,12 +1357,12 @@ vp6_decoder_select="huffman"
 vp6a_decoder_select="vp6_decoder"
 vp6f_decoder_select="vp6_decoder"
 vp8_decoder_select="h264pred"
-wmapro_decoder_select="mdct"
-wmav1_decoder_select="mdct"
-wmav1_encoder_select="mdct"
-wmav2_decoder_select="mdct"
-wmav2_encoder_select="mdct"
-wmavoice_decoder_select="lsp rdft dct mdct"
+wmapro_decoder_select="mdct sinewin"
+wmav1_decoder_select="mdct sinewin"
+wmav1_encoder_select="mdct sinewin"
+wmav2_decoder_select="mdct sinewin"
+wmav2_encoder_select="mdct sinewin"
+wmavoice_decoder_select="lsp rdft dct mdct sinewin"
 wmv1_decoder_select="h263_decoder"
 wmv1_encoder_select="h263_encoder"
 wmv2_decoder_select="h263_decoder"
diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
index f0f3b65084..b9b738210e 100644
--- a/doc/ffmpeg.texi
+++ b/doc/ffmpeg.texi
@@ -622,11 +622,43 @@ Synchronize read on input.
 @section Advanced options
 
 @table @option
-@item -map @var{input_stream_id}[:@var{sync_stream_id}]
-Set stream mapping from input streams to output streams.
-Just enumerate the input streams in the order you want them in the output.
-@var{sync_stream_id} if specified sets the input stream to sync
-against.
+@item -map @var{input_file_id}.@var{input_stream_id}[:@var{sync_file_id}.@var{sync_stream_id}]
+
+Designate an input stream as a source for the output file. Each input
+stream is identified by the input file index @var{input_file_id} and
+the input stream index @var{input_stream_id} within the input
+file. Both indexes start at 0. If specified,
+@var{sync_file_id}.@var{sync_stream_id} sets which input stream
+is used as a presentation sync reference.
+
+The @code{-map} options must be specified just after the output file.
+If any @code{-map} options are used, the number of @code{-map} options
+on the command line must match the number of streams in the output
+file. The first @code{-map} option on the command line specifies the
+source for output stream 0, the second @code{-map} option specifies
+the source for output stream 1, etc.
+
+For example, if you have two audio streams in the first input file,
+these streams are identified by "0.0" and "0.1". You can use
+@code{-map} to select which stream to place in an output file. For
+example:
+@example
+ffmpeg -i INPUT out.wav -map 0.1
+@end example
+will map the input stream in @file{INPUT} identified by "0.1" to
+the (single) output stream in @file{out.wav}.
+
+For example, to select the stream with index 2 from input file
+@file{a.mov} (specified by the identifier "0.2"), and stream with
+index 6 from input @file{b.mov} (specified by the identifier "1.6"),
+and copy them to the output file @file{out.mov}:
+@example
+ffmpeg -i a.mov -i b.mov -vcodec copy -acodec copy out.mov -map 0.2 -map 1.6
+@end example
+
+To add more streams to the output file, you can use the
+@code{-newaudio}, @code{-newvideo}, @code{-newsubtitle} options.
+
 @item -map_meta_data @var{outfile}[,@var{metadata}]:@var{infile}[,@var{metadata}]
 Deprecated, use @var{-map_metadata} instead.
 
diff --git a/ffmpeg.c b/ffmpeg.c
index a07d3c7793..f9e97a2ce4 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -4214,7 +4214,7 @@ static const OptionDef options[] = {
     { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" },
     { "i", HAS_ARG, {(void*)opt_input_file}, "input file name", "filename" },
     { "y", OPT_BOOL, {(void*)&file_overwrite}, "overwrite output files" },
-    { "map", HAS_ARG | OPT_EXPERT, {(void*)opt_map}, "set input stream mapping", "file:stream[:syncfile:syncstream]" },
+    { "map", HAS_ARG | OPT_EXPERT, {(void*)opt_map}, "set input stream mapping", "file.stream[:syncfile.syncstream]" },
     { "map_meta_data", HAS_ARG | OPT_EXPERT, {(void*)opt_map_meta_data}, "DEPRECATED set meta data information of outfile from infile",
       "outfile[,metadata]:infile[,metadata]" },
     { "map_metadata", HAS_ARG | OPT_EXPERT, {(void*)opt_map_metadata}, "set metadata information of outfile from infile",
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index eefe60c772..8343d92bc6 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -43,6 +43,7 @@ OBJS-$(CONFIG_LSP)                     += lsp.o
 OBJS-$(CONFIG_MDCT)                    += mdct.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
+OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_VAAPI)                   += vaapi.o
 OBJS-$(CONFIG_VDPAU)                   += vdpau.o
 
@@ -50,14 +51,14 @@ OBJS-$(CONFIG_VDPAU)                   += vdpau.o
 OBJS-$(CONFIG_A64MULTI_ENCODER)        += a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)       += a64multienc.o elbg.o
 OBJS-$(CONFIG_AAC_DECODER)             += aacdec.o aactab.o aacsbr.o aacps.o \
-                                          aacadtsdec.o mpeg4audio.o
+                                          aacadtsdec.o mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AAC_ENCODER)             += aacenc.o aaccoder.o    \
                                           aacpsy.o aactab.o      \
                                           psymodel.o iirfilter.o \
-                                          mpeg4audio.o
+                                          mpeg4audio.o kbdwin.o
 OBJS-$(CONFIG_AASC_DECODER)            += aasc.o msrledec.o
 OBJS-$(CONFIG_AC3_DECODER)             += ac3dec.o ac3dec_data.o ac3.o
-OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3tab.o ac3.o
+OBJS-$(CONFIG_AC3_ENCODER)             += ac3enc_float.o ac3tab.o ac3.o kbdwin.o
 OBJS-$(CONFIG_AC3_FIXED_ENCODER)       += ac3enc_fixed.o ac3tab.o ac3.o
 OBJS-$(CONFIG_ALAC_DECODER)            += alac.o
 OBJS-$(CONFIG_ALAC_ENCODER)            += alacenc.o
@@ -694,7 +695,7 @@ $(SUBDIR)%_tablegen$(HOSTEXESUF): $(SUBDIR)%_tablegen.c $(SUBDIR)%_tablegen.h $(
 	$(HOSTCC) $(HOSTCFLAGS) $(HOSTLDFLAGS) -o $@ $(filter %.c,$^) $(HOSTLIBS)
 
 GEN_HEADERS = cbrt_tables.h aacps_tables.h aac_tables.h dv_tables.h     \
-              mdct_tables.h mpegaudio_tables.h motionpixels_tables.h    \
+              sinewin_tables.h mpegaudio_tables.h motionpixels_tables.h \
               pcm_tables.h qdm2_tables.h
 GEN_HEADERS := $(addprefix $(SUBDIR), $(GEN_HEADERS))
 
@@ -706,7 +707,7 @@ $(SUBDIR)aacdec.o: $(SUBDIR)cbrt_tables.h
 $(SUBDIR)aacps.o: $(SUBDIR)aacps_tables.h
 $(SUBDIR)aactab.o: $(SUBDIR)aac_tables.h
 $(SUBDIR)dv.o: $(SUBDIR)dv_tables.h
-$(SUBDIR)mdct.o: $(SUBDIR)mdct_tables.h
+$(SUBDIR)sinewin.o: $(SUBDIR)sinewin_tables.h
 $(SUBDIR)mpegaudiodec.o: $(SUBDIR)mpegaudio_tables.h
 $(SUBDIR)mpegaudiodec_float.o: $(SUBDIR)mpegaudio_tables.h
 $(SUBDIR)motionpixels.o: $(SUBDIR)motionpixels_tables.h
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 6317e429e2..1399eda126 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -87,6 +87,8 @@
 #include "fft.h"
 #include "fmtconvert.h"
 #include "lpc.h"
+#include "kbdwin.h"
+#include "sinewin.h"
 
 #include "aac.h"
 #include "aactab.h"
@@ -1750,7 +1752,7 @@ static void windowing_and_mdct_ltp(AACContext *ac, float *out,
         ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
         memset(in + 1024 + 576, 0, 448 * sizeof(float));
     }
-    ff_mdct_calc(&ac->mdct_ltp, out, in);
+    ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
 }
 
 /**
@@ -1839,9 +1841,9 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
     // imdct
     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
         for (i = 0; i < 1024; i += 128)
-            ff_imdct_half(&ac->mdct_small, buf + i, in + i);
+            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
     } else
-        ff_imdct_half(&ac->mdct, buf, in);
+        ac->mdct.imdct_half(&ac->mdct, buf, in);
 
     /* window overlapping
      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 0ca390e72b..8fc8f70726 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -34,6 +34,8 @@
 #include "put_bits.h"
 #include "dsputil.h"
 #include "mpeg4audio.h"
+#include "kbdwin.h"
+#include "sinewin.h"
 
 #include "aac.h"
 #include "aactab.h"
@@ -250,7 +252,7 @@ static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
             for (i = 0; i < 1024; i++)
                 sce->saved[i] = audio[i * chans];
         }
-        ff_mdct_calc(&s->mdct1024, sce->coeffs, output);
+        s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
     } else {
         for (k = 0; k < 1024; k += 128) {
             for (i = 448 + k; i < 448 + k + 256; i++)
@@ -259,7 +261,7 @@ static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
                                          : audio[(i-1024)*chans];
             s->dsp.vector_fmul        (output,     output, k ?  swindow : pwindow, 128);
             s->dsp.vector_fmul_reverse(output+128, output+128, swindow, 128);
-            ff_mdct_calc(&s->mdct128, sce->coeffs + k, output);
+            s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + k, output);
         }
         for (i = 0; i < 1024; i++)
             sce->saved[i] = audio[i * chans];
diff --git a/libavcodec/aacsbr.c b/libavcodec/aacsbr.c
index 237d51a391..117aa98875 100644
--- a/libavcodec/aacsbr.c
+++ b/libavcodec/aacsbr.c
@@ -1155,7 +1155,7 @@ static void sbr_qmf_analysis(DSPContext *dsp, FFTContext *mdct, const float *in,
         }
         z[64+63] = z[32];
 
-        ff_imdct_half(mdct, z, z+64);
+        mdct->imdct_half(mdct, z, z+64);
         for (k = 0; k < 32; k++) {
             W[1][i][k][0] = -z[63-k];
             W[1][i][k][1] = z[k];
@@ -1190,7 +1190,7 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
                 X[0][i][   n] = -X[0][i][n];
                 X[0][i][32+n] =  X[1][i][31-n];
             }
-            ff_imdct_half(mdct, mdct_buf[0], X[0][i]);
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
             for (n = 0; n < 32; n++) {
                 v[     n] =  mdct_buf[0][63 - 2*n];
                 v[63 - n] = -mdct_buf[0][62 - 2*n];
@@ -1199,8 +1199,8 @@ static void sbr_qmf_synthesis(DSPContext *dsp, FFTContext *mdct,
             for (n = 1; n < 64; n+=2) {
                 X[1][i][n] = -X[1][i][n];
             }
-            ff_imdct_half(mdct, mdct_buf[0], X[0][i]);
-            ff_imdct_half(mdct, mdct_buf[1], X[1][i]);
+            mdct->imdct_half(mdct, mdct_buf[0], X[0][i]);
+            mdct->imdct_half(mdct, mdct_buf[1], X[1][i]);
             for (n = 0; n < 64; n++) {
                 v[      n] = -mdct_buf[0][63 -   n] + mdct_buf[1][  n    ];
                 v[127 - n] =  mdct_buf[0][63 -   n] + mdct_buf[1][  n    ];
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 4df0cf8a03..094b2615ff 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -35,6 +35,7 @@
 #include "ac3_parser.h"
 #include "ac3dec.h"
 #include "ac3dec_data.h"
+#include "kbdwin.h"
 
 /** Large enough for maximum possible frame size when the specification limit is ignored */
 #define AC3_FRAME_BUFFER_SIZE 32768
@@ -621,13 +622,13 @@ static inline void do_imdct(AC3DecodeContext *s, int channels)
             float *x = s->tmp_output+128;
             for(i=0; i<128; i++)
                 x[i] = s->transform_coeffs[ch][2*i];
-            ff_imdct_half(&s->imdct_256, s->tmp_output, x);
+            s->imdct_256.imdct_half(&s->imdct_256, s->tmp_output, x);
             s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
             for(i=0; i<128; i++)
                 x[i] = s->transform_coeffs[ch][2*i+1];
-            ff_imdct_half(&s->imdct_256, s->delay[ch-1], x);
+            s->imdct_256.imdct_half(&s->imdct_256, s->delay[ch-1], x);
         } else {
-            ff_imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
+            s->imdct_512.imdct_half(&s->imdct_512, s->tmp_output, s->transform_coeffs[ch]);
             s->dsp.vector_fmul_window(s->output[ch-1], s->delay[ch-1], s->tmp_output, s->window, 128);
             memcpy(s->delay[ch-1], s->tmp_output+128, 128*sizeof(float));
         }
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index 4b13e4c723..e0783f9aa3 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -28,6 +28,7 @@
 
 #define CONFIG_AC3ENC_FLOAT 1
 #include "ac3enc.c"
+#include "kbdwin.h"
 
 
 /**
@@ -74,7 +75,7 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
  */
 static void mdct512(AC3MDCTContext *mdct, float *out, float *in)
 {
-    ff_mdct_calc(&mdct->fft, out, in);
+    mdct->fft.mdct_calc(&mdct->fft, out, in);
 }
 
 
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
index dff0689566..3e0e41ec3b 100644
--- a/libavcodec/arm/fft_init_arm.c
+++ b/libavcodec/arm/fft_init_arm.c
@@ -19,6 +19,7 @@
  */
 
 #include "libavcodec/fft.h"
+#include "libavcodec/rdft.h"
 #include "libavcodec/synth_filter.h"
 
 void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
diff --git a/libavcodec/atrac1.c b/libavcodec/atrac1.c
index be78445b8f..8d8267556a 100644
--- a/libavcodec/atrac1.c
+++ b/libavcodec/atrac1.c
@@ -36,6 +36,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "fft.h"
+#include "sinewin.h"
 
 #include "atrac.h"
 #include "atrac1data.h"
@@ -99,7 +100,7 @@ static void at1_imdct(AT1Ctx *q, float *spec, float *out, int nbits,
         for (i = 0; i < transf_size / 2; i++)
             FFSWAP(float, spec[i], spec[transf_size - 1 - i]);
     }
-    ff_imdct_half(mdct_context, out, spec);
+    mdct_context->imdct_half(mdct_context, out, spec);
 }
 
 
diff --git a/libavcodec/atrac3.c b/libavcodec/atrac3.c
index 3d1f990164..3bf514c8b3 100644
--- a/libavcodec/atrac3.c
+++ b/libavcodec/atrac3.c
@@ -146,7 +146,7 @@ static void IMLT(ATRAC3Context *q, float *pInput, float *pOutput, int odd_band)
         /**
         * Reverse the odd bands before IMDCT, this is an effect of the QMF transform
         * or it gives better compression to do it this way.
-        * FIXME: It should be possible to handle this in ff_imdct_calc
+        * FIXME: It should be possible to handle this in imdct_calc
         * for that to happen a modification of the prerotation step of
         * all SIMD code and C code is needed.
         * Or fix the functions before so they generate a pre reversed spectrum.
@@ -156,7 +156,7 @@ static void IMLT(ATRAC3Context *q, float *pInput, float *pOutput, int odd_band)
             FFSWAP(float, pInput[i], pInput[255-i]);
     }
 
-    ff_imdct_calc(&q->mdct_ctx,pOutput,pInput);
+    q->mdct_ctx.imdct_calc(&q->mdct_ctx,pOutput,pInput);
 
     /* Perform windowing on the output. */
     dsp.vector_fmul(pOutput, pOutput, mdct_window, 512);
diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 7d5d08390f..9e0ddaa627 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -19,6 +19,8 @@
 #include "libavutil/mem.h"
 #include "avfft.h"
 #include "fft.h"
+#include "rdft.h"
+#include "dct.h"
 
 /* FFT */
 
@@ -101,7 +103,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
 
 void av_rdft_calc(RDFTContext *s, FFTSample *data)
 {
-    ff_rdft_calc(s, data);
+    s->rdft_calc(s, data);
 }
 
 void av_rdft_end(RDFTContext *s)
@@ -128,7 +130,7 @@ DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
 
 void av_dct_calc(DCTContext *s, FFTSample *data)
 {
-    ff_dct_calc(s, data);
+    s->dct_calc(s, data);
 }
 
 void av_dct_end(DCTContext *s)
diff --git a/libavcodec/binkaudio.c b/libavcodec/binkaudio.c
index a00d6578e2..d879efc2e0 100644
--- a/libavcodec/binkaudio.c
+++ b/libavcodec/binkaudio.c
@@ -32,7 +32,8 @@
 #define ALT_BITSTREAM_READER_LE
 #include "get_bits.h"
 #include "dsputil.h"
-#include "fft.h"
+#include "dct.h"
+#include "rdft.h"
 #include "fmtconvert.h"
 #include "libavutil/intfloat_readwrite.h"
 
@@ -223,11 +224,11 @@ static void decode_block(BinkAudioContext *s, short *out, int use_dct)
 
         if (CONFIG_BINKAUDIO_DCT_DECODER && use_dct) {
             coeffs[0] /= 0.5;
-            ff_dct_calc (&s->trans.dct,  coeffs);
+            s->trans.dct.dct_calc(&s->trans.dct,  coeffs);
             s->dsp.vector_fmul_scalar(coeffs, coeffs, s->frame_len / 2, s->frame_len);
         }
         else if (CONFIG_BINKAUDIO_RDFT_DECODER)
-            ff_rdft_calc(&s->trans.rdft, coeffs);
+            s->trans.rdft.rdft_calc(&s->trans.rdft, coeffs);
     }
 
     s->fmt_conv.float_to_int16_interleave(out, (const float **)s->coeffs_ptr,
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index da31e9f520..3f7776bed0 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -54,6 +54,7 @@
 #include "bytestream.h"
 #include "fft.h"
 #include "libavutil/audioconvert.h"
+#include "sinewin.h"
 
 #include "cookdata.h"
 
@@ -753,7 +754,7 @@ static void imlt_gain(COOKContext *q, float *inbuffer,
     int i;
 
     /* Inverse modified discrete cosine transform */
-    ff_imdct_calc(&q->mdct_ctx, q->mono_mdct_output, inbuffer);
+    q->mdct_ctx.imdct_calc(&q->mdct_ctx, q->mono_mdct_output, inbuffer);
 
     q->imlt_window (q, buffer1, gains_ptr, previous_buffer);
 
diff --git a/libavcodec/costablegen.c b/libavcodec/costablegen.c
index bfcd6356a0..545fe7f998 100644
--- a/libavcodec/costablegen.c
+++ b/libavcodec/costablegen.c
@@ -37,7 +37,7 @@ int main(int argc, char *argv[])
     double (*func)(double) = do_sin ? sin : cos;
 
     printf("/* This file was generated by libavcodec/costablegen */\n");
-    printf("#include \"libavcodec/fft.h\"\n");
+    printf("#include \"libavcodec/%s\"\n", do_sin ? "rdft.h" : "fft.h");
     for (i = 4; i <= BITS; i++) {
         int m = 1 << i;
         double freq = 2*M_PI/m;
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index dab94c30a0..83cf1b4896 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -29,8 +29,7 @@
 
 #include <math.h>
 #include "libavutil/mathematics.h"
-#include "fft.h"
-#include "x86/fft.h"
+#include "dct.h"
 
 #define DCT32_FLOAT
 #include "dct32.c"
@@ -59,7 +58,7 @@ static void ff_dst_calc_I_c(DCTContext *ctx, FFTSample *data)
     }
 
     data[n/2] *= 2;
-    ff_rdft_calc(&ctx->rdft, data);
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
 
     data[0] *= 0.5f;
 
@@ -93,7 +92,7 @@ static void ff_dct_calc_I_c(DCTContext *ctx, FFTSample *data)
         data[n - i] = tmp1 + s;
     }
 
-    ff_rdft_calc(&ctx->rdft, data);
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
     data[n] = data[1];
     data[1] = next;
 
@@ -121,7 +120,7 @@ static void ff_dct_calc_III_c(DCTContext *ctx, FFTSample *data)
 
     data[1] = 2 * next;
 
-    ff_rdft_calc(&ctx->rdft, data);
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
 
     for (i = 0; i < n / 2; i++) {
         float tmp1 = data[i        ] * inv_n;
@@ -152,7 +151,7 @@ static void ff_dct_calc_II_c(DCTContext *ctx, FFTSample *data)
         data[n-i-1] = tmp1 - s;
     }
 
-    ff_rdft_calc(&ctx->rdft, data);
+    ctx->rdft.rdft_calc(&ctx->rdft, data);
 
     next = data[1] * 0.5;
     data[1] *= -1;
@@ -176,11 +175,6 @@ static void dct32_func(DCTContext *ctx, FFTSample *data)
     ctx->dct32(data, data);
 }
 
-void ff_dct_calc(DCTContext *s, FFTSample *data)
-{
-    s->dct_calc(s, data);
-}
-
 av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
 {
     int n = 1 << nbits;
diff --git a/libavcodec/dct.h b/libavcodec/dct.h
new file mode 100644
index 0000000000..44cf04d41d
--- /dev/null
+++ b/libavcodec/dct.h
@@ -0,0 +1,50 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef AVCODEC_DCT_H
+#define AVCODEC_DCT_H
+
+#include "rdft.h"
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+    void (*dct32)(FFTSample *out, const FFTSample *in);
+};
+
+/**
+ * Set up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_end (DCTContext *s);
+
+#endif
diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c
index 85282f531a..9ad8793c2b 100644
--- a/libavcodec/fft-test.c
+++ b/libavcodec/fft-test.c
@@ -27,6 +27,8 @@
 #include "libavutil/lfg.h"
 #include "libavutil/log.h"
 #include "fft.h"
+#include "dct.h"
+#include "rdft.h"
 #include <math.h>
 #include <unistd.h>
 #include <sys/time.h>
@@ -327,20 +329,20 @@ int main(int argc, char **argv)
     case TRANSFORM_MDCT:
         if (do_inverse) {
             imdct_ref((float *)tab_ref, (float *)tab1, fft_nbits);
-            ff_imdct_calc(m, tab2, (float *)tab1);
+            m->imdct_calc(m, tab2, (float *)tab1);
             err = check_diff((float *)tab_ref, tab2, fft_size, scale);
         } else {
             mdct_ref((float *)tab_ref, (float *)tab1, fft_nbits);
 
-            ff_mdct_calc(m, tab2, (float *)tab1);
+            m->mdct_calc(m, tab2, (float *)tab1);
 
             err = check_diff((float *)tab_ref, tab2, fft_size / 2, scale);
         }
         break;
     case TRANSFORM_FFT:
         memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
-        ff_fft_permute(s, tab);
-        ff_fft_calc(s, tab);
+        s->fft_permute(s, tab);
+        s->fft_calc(s, tab);
 
         fft_ref(tab_ref, tab1, fft_nbits);
         err = check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 1.0);
@@ -357,7 +359,7 @@ int main(int argc, char **argv)
             memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
             tab2[1] = tab1[fft_size_2].re;
 
-            ff_rdft_calc(r, tab2);
+            r->rdft_calc(r, tab2);
             fft_ref(tab_ref, tab1, fft_nbits);
             for (i = 0; i < fft_size; i++) {
                 tab[i].re = tab2[i];
@@ -369,7 +371,7 @@ int main(int argc, char **argv)
                 tab2[i]    = tab1[i].re;
                 tab1[i].im = 0;
             }
-            ff_rdft_calc(r, tab2);
+            r->rdft_calc(r, tab2);
             fft_ref(tab_ref, tab1, fft_nbits);
             tab_ref[0].im = tab_ref[fft_size_2].re;
             err = check_diff((float *)tab_ref, (float *)tab2, fft_size, 1.0);
@@ -377,7 +379,7 @@ int main(int argc, char **argv)
         break;
     case TRANSFORM_DCT:
         memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
-        ff_dct_calc(d, tab);
+        d->dct_calc(d, tab);
         if (do_inverse) {
             idct_ref(tab_ref, tab1, fft_nbits);
         } else {
@@ -402,22 +404,22 @@ int main(int argc, char **argv)
                 switch (transform) {
                 case TRANSFORM_MDCT:
                     if (do_inverse) {
-                        ff_imdct_calc(m, (float *)tab, (float *)tab1);
+                        m->imdct_calc(m, (float *)tab, (float *)tab1);
                     } else {
-                        ff_mdct_calc(m, (float *)tab, (float *)tab1);
+                        m->mdct_calc(m, (float *)tab, (float *)tab1);
                     }
                     break;
                 case TRANSFORM_FFT:
                     memcpy(tab, tab1, fft_size * sizeof(FFTComplex));
-                    ff_fft_calc(s, tab);
+                    s->fft_calc(s, tab);
                     break;
                 case TRANSFORM_RDFT:
                     memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
-                    ff_rdft_calc(r, tab2);
+                    r->rdft_calc(r, tab2);
                     break;
                 case TRANSFORM_DCT:
                     memcpy(tab2, tab1, fft_size * sizeof(FFTSample));
-                    ff_dct_calc(d, tab2);
+                    d->dct_calc(d, tab2);
                     break;
                 }
             }
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 58a7f30ad1..5a0f41ff6e 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -39,7 +39,14 @@ struct FFTContext {
     /* pre/post rotation tables */
     FFTSample *tcos;
     FFTSample *tsin;
+    /**
+     * Do the permutation needed BEFORE calling fft_calc().
+     */
     void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    /**
+     * Do a complex FFT with the parameters defined in ff_fft_init(). The
+     * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+     */
     void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
     void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
     void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
@@ -54,20 +61,13 @@ struct FFTContext {
 
 #if CONFIG_HARDCODED_TABLES
 #define COSTABLE_CONST const
-#define SINTABLE_CONST const
-#define SINETABLE_CONST const
 #else
 #define COSTABLE_CONST
-#define SINTABLE_CONST
-#define SINETABLE_CONST
 #endif
 
 #define COSTABLE(size) \
     COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
-#define SINTABLE(size) \
-    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
-#define SINETABLE(size) \
-    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+
 extern COSTABLE(16);
 extern COSTABLE(32);
 extern COSTABLE(64);
@@ -89,20 +89,6 @@ extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
  */
 void ff_init_ff_cos_tabs(int index);
 
-extern SINTABLE(16);
-extern SINTABLE(32);
-extern SINTABLE(64);
-extern SINTABLE(128);
-extern SINTABLE(256);
-extern SINTABLE(512);
-extern SINTABLE(1024);
-extern SINTABLE(2048);
-extern SINTABLE(4096);
-extern SINTABLE(8192);
-extern SINTABLE(16384);
-extern SINTABLE(32768);
-extern SINTABLE(65536);
-
 /**
  * Set up a complex FFT.
  * @param nbits           log2 of the length of the input array
@@ -115,131 +101,12 @@ void ff_fft_init_mmx(FFTContext *s);
 void ff_fft_init_arm(FFTContext *s);
 void ff_dct_init_mmx(DCTContext *s);
 
-/**
- * Do the permutation needed BEFORE calling ff_fft_calc().
- */
-static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
-{
-    s->fft_permute(s, z);
-}
-/**
- * Do a complex FFT with the parameters defined in ff_fft_init(). The
- * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
- */
-static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
-{
-    s->fft_calc(s, z);
-}
 void ff_fft_end(FFTContext *s);
 
-/* MDCT computation */
-
-static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    s->imdct_calc(s, output, input);
-}
-static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
-{
-    s->imdct_half(s, output, input);
-}
-
-static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
-                                const FFTSample *input)
-{
-    s->mdct_calc(s, output, input);
-}
-
-/**
- * Maximum window size for ff_kbd_window_init.
- */
-#define FF_KBD_WINDOW_MAX 1024
-
-/**
- * Generate a Kaiser-Bessel Derived Window.
- * @param   window  pointer to half window
- * @param   alpha   determines window shape
- * @param   n       size of half window, max FF_KBD_WINDOW_MAX
- */
-void ff_kbd_window_init(float *window, float alpha, int n);
-
-/**
- * Generate a sine window.
- * @param   window  pointer to half window
- * @param   n       size of half window
- */
-void ff_sine_window_init(float *window, int n);
-
-/**
- * initialize the specified entry of ff_sine_windows
- */
-void ff_init_ff_sine_windows(int index);
-extern SINETABLE(  32);
-extern SINETABLE(  64);
-extern SINETABLE( 128);
-extern SINETABLE( 256);
-extern SINETABLE( 512);
-extern SINETABLE(1024);
-extern SINETABLE(2048);
-extern SINETABLE(4096);
-extern SINETABLE_CONST float * const ff_sine_windows[13];
-
 int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
 void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
 void ff_mdct_end(FFTContext *s);
 
-/* Real Discrete Fourier Transform */
-
-struct RDFTContext {
-    int nbits;
-    int inverse;
-    int sign_convention;
-
-    /* pre/post rotation tables */
-    const FFTSample *tcos;
-    SINTABLE_CONST FFTSample *tsin;
-    FFTContext fft;
-    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
-};
-
-/**
- * Set up a real FFT.
- * @param nbits           log2 of the length of the input array
- * @param trans           the type of transform
- */
-int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
-void ff_rdft_end(RDFTContext *s);
-
-void ff_rdft_init_arm(RDFTContext *s);
-
-static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
-{
-    s->rdft_calc(s, data);
-}
-
-/* Discrete Cosine Transform */
-
-struct DCTContext {
-    int nbits;
-    int inverse;
-    RDFTContext rdft;
-    const float *costab;
-    FFTSample *csc2;
-    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
-    void (*dct32)(FFTSample *out, const FFTSample *in);
-};
-
-/**
- * Set up DCT.
- * @param nbits           size of the input array:
- *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
- *                        (1 << nbits) + 1 for DCT-I
- *
- * @note the first element of the input of DST-I is ignored
- */
-int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
-void ff_dct_calc(DCTContext *s, FFTSample *data);
-void ff_dct_end (DCTContext *s);
-
 #endif /* AVCODEC_FFT_H */
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 4cbb287a57..cb0c7102d8 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -41,6 +41,7 @@
 #include "dsputil.h"
 #include "fft.h"
 #include "libavutil/audioconvert.h"
+#include "sinewin.h"
 
 #include "imcdata.h"
 
@@ -564,8 +565,8 @@ static void imc_imdct256(IMCContext *q) {
     }
 
     /* FFT */
-    ff_fft_permute(&q->fft, q->samples);
-    ff_fft_calc (&q->fft, q->samples);
+    q->fft.fft_permute(&q->fft, q->samples);
+    q->fft.fft_calc   (&q->fft, q->samples);
 
     /* postrotation, window and reorder */
     for(i = 0; i < COEFFS/2; i++){
diff --git a/libavcodec/kbdwin.c b/libavcodec/kbdwin.c
new file mode 100644
index 0000000000..8b33861f71
--- /dev/null
+++ b/libavcodec/kbdwin.c
@@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <assert.h>
+#include <libavutil/mathematics.h>
+#include "libavutil/attributes.h"
+#include "kbdwin.h"
+
+#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
+
+av_cold void ff_kbd_window_init(float *window, float alpha, int n)
+{
+   int i, j;
+   double sum = 0.0, bessel, tmp;
+   double local_window[FF_KBD_WINDOW_MAX];
+   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+
+   assert(n <= FF_KBD_WINDOW_MAX);
+
+   for (i = 0; i < n; i++) {
+       tmp = i * (n - i) * alpha2;
+       bessel = 1.0;
+       for (j = BESSEL_I0_ITER; j > 0; j--)
+           bessel = bessel * tmp / (j * j) + 1;
+       sum += bessel;
+       local_window[i] = sum;
+   }
+
+   sum++;
+   for (i = 0; i < n; i++)
+       window[i] = sqrt(local_window[i] / sum);
+}
+
diff --git a/libavcodec/kbdwin.h b/libavcodec/kbdwin.h
new file mode 100644
index 0000000000..e762105628
--- /dev/null
+++ b/libavcodec/kbdwin.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_KBDWIN_H
+#define AVCODEC_KBDWIN_H
+
+/**
+ * Maximum window size for ff_kbd_window_init.
+ */
+#define FF_KBD_WINDOW_MAX 1024
+
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window, max FF_KBD_WINDOW_MAX
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+
+#endif
diff --git a/libavcodec/mdct.c b/libavcodec/mdct.c
index 31e2216dd3..ca686b4ccb 100644
--- a/libavcodec/mdct.c
+++ b/libavcodec/mdct.c
@@ -30,33 +30,6 @@
  * MDCT/IMDCT transforms.
  */
 
-// Generate a Kaiser-Bessel Derived Window.
-#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
-av_cold void ff_kbd_window_init(float *window, float alpha, int n)
-{
-   int i, j;
-   double sum = 0.0, bessel, tmp;
-   double local_window[FF_KBD_WINDOW_MAX];
-   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
-
-   assert(n <= FF_KBD_WINDOW_MAX);
-
-   for (i = 0; i < n; i++) {
-       tmp = i * (n - i) * alpha2;
-       bessel = 1.0;
-       for (j = BESSEL_I0_ITER; j > 0; j--)
-           bessel = bessel * tmp / (j * j) + 1;
-       sum += bessel;
-       local_window[i] = sum;
-   }
-
-   sum++;
-   for (i = 0; i < n; i++)
-       window[i] = sqrt(local_window[i] / sum);
-}
-
-#include "mdct_tablegen.h"
-
 /**
  * init MDCT or IMDCT computation.
  */
@@ -146,7 +119,7 @@ void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
         in1 += 2;
         in2 -= 2;
     }
-    ff_fft_calc(s, z);
+    s->fft_calc(s, z);
 
     /* post rotation + reordering */
     for(k = 0; k < n8; k++) {
@@ -213,7 +186,7 @@ void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
         CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
     }
 
-    ff_fft_calc(s, x);
+    s->fft_calc(s, x);
 
     /* post rotation */
     for(i=0;i<n8;i++) {
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 97c7855f06..4d78566463 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -33,7 +33,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "fft.h"
+#include "dct.h"
 
 #define CONFIG_AUDIO_NONSHORT 0
 
diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index fd8568d5ab..5ad49abceb 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -39,6 +39,7 @@
 #include "dsputil.h"
 #include "fft.h"
 #include "fmtconvert.h"
+#include "sinewin.h"
 
 #define ALT_BITSTREAM_READER_LE
 #include "get_bits.h"
@@ -121,7 +122,7 @@ static void nelly_decode_block(NellyMoserDecodeContext *s,
         memset(&aptr[NELLY_FILL_LEN], 0,
                (NELLY_BUF_LEN - NELLY_FILL_LEN) * sizeof(float));
 
-        ff_imdct_calc(&s->imdct_ctx, s->imdct_out, aptr);
+        s->imdct_ctx.imdct_calc(&s->imdct_ctx, s->imdct_out, aptr);
         /* XXX: overlapping and windowing should be part of a more
            generic imdct function */
         overlap_and_window(s, s->state, aptr, s->imdct_out);
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 9b792ff2ac..81a1d43e4e 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -39,6 +39,7 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "fft.h"
+#include "sinewin.h"
 
 #define BITSTREAM_WRITER_LE
 #include "put_bits.h"
@@ -116,13 +117,13 @@ static void apply_mdct(NellyMoserEncodeContext *s)
     s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
     s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
                                NELLY_BUF_LEN);
-    ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
+    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
 
     s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
                        ff_sine_128, NELLY_BUF_LEN);
     s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
                                NELLY_BUF_LEN);
-    ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
+    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
 }
 
 static av_cold int encode_init(AVCodecContext *avctx)
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index 3779b8e3aa..0d5eeac0ce 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -1588,7 +1588,7 @@ static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet)
     int i;
     q->fft.complex[channel][0].re *= 2.0f;
     q->fft.complex[channel][0].im = 0.0f;
-    ff_rdft_calc(&q->rdft_ctx, (FFTSample *)q->fft.complex[channel]);
+    q->rdft_ctx.rdft_calc(&q->rdft_ctx, (FFTSample *)q->fft.complex[channel]);
     /* add samples to output buffer */
     for (i = 0; i < ((q->fft_frame_size + 15) & ~15); i++)
         q->output_buffer[q->channels * i + channel] += ((float *) q->fft.complex[channel])[i] * gain;
diff --git a/libavcodec/rdft.c b/libavcodec/rdft.c
index 59b4624716..ebddd8b56b 100644
--- a/libavcodec/rdft.c
+++ b/libavcodec/rdft.c
@@ -21,7 +21,7 @@
 #include <stdlib.h>
 #include <math.h>
 #include "libavutil/mathematics.h"
-#include "fft.h"
+#include "rdft.h"
 
 /**
  * @file
@@ -65,8 +65,8 @@ static void ff_rdft_calc_c(RDFTContext* s, FFTSample* data)
     const FFTSample *tsin = s->tsin;
 
     if (!s->inverse) {
-        ff_fft_permute(&s->fft, (FFTComplex*)data);
-        ff_fft_calc(&s->fft, (FFTComplex*)data);
+        s->fft.fft_permute(&s->fft, (FFTComplex*)data);
+        s->fft.fft_calc(&s->fft, (FFTComplex*)data);
     }
     /* i=0 is a special case because of packing, the DC term is real, so we
        are going to throw the N/2 term (also real) in with it. */
@@ -91,8 +91,8 @@ static void ff_rdft_calc_c(RDFTContext* s, FFTSample* data)
     if (s->inverse) {
         data[0] *= k1;
         data[1] *= k1;
-        ff_fft_permute(&s->fft, (FFTComplex*)data);
-        ff_fft_calc(&s->fft, (FFTComplex*)data);
+        s->fft.fft_permute(&s->fft, (FFTComplex*)data);
+        s->fft.fft_calc(&s->fft, (FFTComplex*)data);
     }
 }
 
diff --git a/libavcodec/rdft.h b/libavcodec/rdft.h
new file mode 100644
index 0000000000..cf50aba39c
--- /dev/null
+++ b/libavcodec/rdft.h
@@ -0,0 +1,74 @@
+/*
+ * (I)RDFT transforms
+ * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_RDFT_H
+#define AVCODEC_RDFT_H
+
+#include "config.h"
+#include "fft.h"
+
+#if CONFIG_HARDCODED_TABLES
+#   define SINTABLE_CONST const
+#else
+#   define SINTABLE_CONST
+#endif
+
+#define SINTABLE(size) \
+    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    SINTABLE_CONST FFTSample *tsin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+void ff_rdft_init_arm(RDFTContext *s);
+
+
+#endif
diff --git a/libavcodec/sinewin.c b/libavcodec/sinewin.c
new file mode 100644
index 0000000000..1fa0e953f0
--- /dev/null
+++ b/libavcodec/sinewin.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "sinewin.h"
+#include "sinewin_tablegen.h"
diff --git a/libavcodec/sinewin.h b/libavcodec/sinewin.h
new file mode 100644
index 0000000000..33f7d68b40
--- /dev/null
+++ b/libavcodec/sinewin.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2008 Robert Swain
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SINEWIN_H
+#define AVCODEC_SINEWIN_H
+
+#include "config.h"
+#include "libavutil/mem.h"
+
+#if CONFIG_HARDCODED_TABLES
+#   define SINETABLE_CONST const
+#else
+#   define SINETABLE_CONST
+#endif
+
+#define SINETABLE(size) \
+    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+
+extern SINETABLE(  32);
+extern SINETABLE(  64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+
+#endif
diff --git a/libavcodec/mdct_tablegen.c b/libavcodec/sinewin_tablegen.c
similarity index 94%
rename from libavcodec/mdct_tablegen.c
rename to libavcodec/sinewin_tablegen.c
index 6205f06e3c..48eb771e48 100644
--- a/libavcodec/mdct_tablegen.c
+++ b/libavcodec/sinewin_tablegen.c
@@ -1,5 +1,5 @@
 /*
- * Generate a header file for hardcoded MDCT tables
+ * Generate a header file for hardcoded sine windows
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
@@ -29,7 +29,7 @@
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
-#include "mdct_tablegen.h"
+#include "sinewin_tablegen.h"
 #include "tableprint.h"
 
 int main(void)
diff --git a/libavcodec/mdct_tablegen.h b/libavcodec/sinewin_tablegen.h
similarity index 95%
rename from libavcodec/mdct_tablegen.h
rename to libavcodec/sinewin_tablegen.h
index 51a0094221..1c85d79f2f 100644
--- a/libavcodec/mdct_tablegen.h
+++ b/libavcodec/sinewin_tablegen.h
@@ -1,5 +1,5 @@
 /*
- * Header file for hardcoded MDCT tables
+ * Header file for hardcoded sine windows
  *
  * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
  *
@@ -36,7 +36,7 @@ SINETABLE(1024);
 SINETABLE(2048);
 SINETABLE(4096);
 #else
-#include "libavcodec/mdct_tables.h"
+#include "libavcodec/sinewin_tables.h"
 #endif
 
 SINETABLE_CONST float * const ff_sine_windows[] = {
diff --git a/libavcodec/synth_filter.c b/libavcodec/synth_filter.c
index 4af496d372..5f10530974 100644
--- a/libavcodec/synth_filter.c
+++ b/libavcodec/synth_filter.c
@@ -29,7 +29,7 @@ static void synth_filter_float(FFTContext *imdct,
     float *synth_buf= synth_buf_ptr + *synth_buf_offset;
     int i, j;
 
-    ff_imdct_half(imdct, synth_buf, in);
+    imdct->imdct_half(imdct, synth_buf, in);
 
     for (i = 0; i < 16; i++){
         float a= synth_buf2[i     ];
diff --git a/libavcodec/twinvq.c b/libavcodec/twinvq.c
index bd43104e6e..857ca5bae5 100644
--- a/libavcodec/twinvq.c
+++ b/libavcodec/twinvq.c
@@ -24,6 +24,7 @@
 #include "dsputil.h"
 #include "fft.h"
 #include "lsp.h"
+#include "sinewin.h"
 
 #include <math.h>
 #include <stdint.h>
@@ -608,6 +609,7 @@ static void dec_lpc_spectrum_inv(TwinContext *tctx, float *lsp,
 static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
                             float *in, float *prev, int ch)
 {
+    FFTContext *mdct = &tctx->mdct_ctx[ftype];
     const ModeTab *mtab = tctx->mtab;
     int bsize = mtab->size / mtab->fmode[ftype].sub;
     int size  = mtab->size;
@@ -640,7 +642,7 @@ static void imdct_and_window(TwinContext *tctx, enum FrameType ftype, int wtype,
 
         wsize = types_sizes[wtype_to_wsize[sub_wtype]];
 
-        ff_imdct_half(&tctx->mdct_ctx[ftype], buf1 + bsize*j, in + bsize*j);
+        mdct->imdct_half(mdct, buf1 + bsize*j, in + bsize*j);
 
         tctx->dsp.vector_fmul_window(out2,
                                      prev_buf + (bsize-wsize)/2,
diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index deaae7576b..637f510df4 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -1448,7 +1448,7 @@ void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
 static int vorbis_parse_audio_packet(vorbis_context *vc)
 {
     GetBitContext *gb = &vc->gb;
-
+    FFTContext *mdct;
     uint_fast8_t previous_window = vc->previous_window;
     uint_fast8_t mode_number;
     uint_fast8_t blockflag;
@@ -1552,11 +1552,13 @@ static int vorbis_parse_audio_packet(vorbis_context *vc)
 
 // Dotproduct, MDCT
 
+    mdct = &vc->mdct[blockflag];
+
     for (j = vc->audio_channels-1;j >= 0; j--) {
         ch_floor_ptr = vc->channel_floors   + j           * blocksize / 2;
         ch_res_ptr   = vc->channel_residues + res_chan[j] * blocksize / 2;
         vc->dsp.vector_fmul(ch_floor_ptr, ch_floor_ptr, ch_res_ptr, blocksize / 2);
-        ff_imdct_half(&vc->mdct[blockflag], ch_res_ptr, ch_floor_ptr);
+        mdct->imdct_half(mdct, ch_res_ptr, ch_floor_ptr);
     }
 
 // Overlap/add, save data for next overlapping  FPMATH
diff --git a/libavcodec/vorbis_enc.c b/libavcodec/vorbis_enc.c
index 93683ac422..7dae4cb2b0 100644
--- a/libavcodec/vorbis_enc.c
+++ b/libavcodec/vorbis_enc.c
@@ -935,7 +935,7 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, const signed short *a
     }
 
     for (channel = 0; channel < venc->channels; channel++)
-        ff_mdct_calc(&venc->mdct[0], venc->coeffs + channel * window_len,
+        venc->mdct[0].mdct_calc(&venc->mdct[0], venc->coeffs + channel * window_len,
                      venc->samples + channel * window_len * 2);
 
     if (samples) {
diff --git a/libavcodec/wma.c b/libavcodec/wma.c
index a7eacb8c78..67599b7eab 100644
--- a/libavcodec/wma.c
+++ b/libavcodec/wma.c
@@ -20,6 +20,7 @@
  */
 
 #include "avcodec.h"
+#include "sinewin.h"
 #include "wma.h"
 #include "wmadata.h"
 
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index 83f8dea8bb..5382999cb0 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -447,6 +447,7 @@ static int wma_decode_block(WMACodecContext *s)
     int coef_nb_bits, total_gain;
     int nb_coefs[MAX_CHANNELS];
     float mdct_norm;
+    FFTContext *mdct;
 
 #ifdef TRACE
     tprintf(s->avctx, "***decode_block: %d:%d\n", s->frame_count - 1, s->block_num);
@@ -742,12 +743,14 @@ static int wma_decode_block(WMACodecContext *s)
     }
 
 next:
+    mdct = &s->mdct_ctx[bsize];
+
     for(ch = 0; ch < s->nb_channels; ch++) {
         int n4, index;
 
         n4 = s->block_len / 2;
         if(s->channel_coded[ch]){
-            ff_imdct_calc(&s->mdct_ctx[bsize], s->output, s->coefs[ch]);
+            mdct->imdct_calc(mdct, s->output, s->coefs[ch]);
         }else if(!(s->ms_stereo && ch==1))
             memset(s->output, 0, sizeof(s->output));
 
diff --git a/libavcodec/wmaenc.c b/libavcodec/wmaenc.c
index 4e54a70161..9112918382 100644
--- a/libavcodec/wmaenc.c
+++ b/libavcodec/wmaenc.c
@@ -77,6 +77,7 @@ static int encode_init(AVCodecContext * avctx){
 static void apply_window_and_mdct(AVCodecContext * avctx, const signed short * audio, int len) {
     WMACodecContext *s = avctx->priv_data;
     int window_index= s->frame_len_bits - s->block_len_bits;
+    FFTContext *mdct = &s->mdct_ctx[window_index];
     int i, j, channel;
     const float * win = s->windows[window_index];
     int window_len = 1 << s->block_len_bits;
@@ -89,7 +90,7 @@ static void apply_window_and_mdct(AVCodecContext * avctx, const signed short * a
             s->output[i+window_len]  = audio[j] / n * win[window_len - i - 1];
             s->frame_out[channel][i] = audio[j] / n * win[i];
         }
-        ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output);
+        mdct->mdct_calc(mdct, s->coefs[channel], s->output);
     }
 }
 
diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index d248ae4ac0..b207e18b2f 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -92,6 +92,7 @@
 #include "put_bits.h"
 #include "wmaprodata.h"
 #include "dsputil.h"
+#include "sinewin.h"
 #include "wma.h"
 
 /** current decoder limitations */
@@ -1222,6 +1223,7 @@ static int decode_subframe(WMAProDecodeCtx *s)
             get_bits_count(&s->gb) - s->subframe_offset);
 
     if (transmit_coeffs) {
+        FFTContext *mdct = &s->mdct_ctx[av_log2(subframe_len) - WMAPRO_BLOCK_MIN_BITS];
         /** reconstruct the per channel data */
         inverse_channel_transform(s);
         for (i = 0; i < s->channels_for_cur_subframe; i++) {
@@ -1246,9 +1248,8 @@ static int decode_subframe(WMAProDecodeCtx *s)
                                           quant, end - start);
             }
 
-            /** apply imdct (ff_imdct_half == DCTIV with reverse) */
-            ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - WMAPRO_BLOCK_MIN_BITS],
-                          s->channel[c].coeffs, s->tmp);
+            /** apply imdct (imdct_half == DCTIV with reverse) */
+            mdct->imdct_half(mdct, s->channel[c].coeffs, s->tmp);
         }
     }
 
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 13011ec234..dfdfabbbb3 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -36,8 +36,9 @@
 #include "acelp_filters.h"
 #include "lsp.h"
 #include "libavutil/lzo.h"
-#include "avfft.h"
-#include "fft.h"
+#include "dct.h"
+#include "rdft.h"
+#include "sinewin.h"
 
 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
 #define MAX_LSPS             16  ///< maximum filter order
@@ -558,7 +559,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
     int n, idx;
 
     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
-    ff_rdft_calc(&s->rdft, lpcs);
+    s->rdft.rdft_calc(&s->rdft, lpcs);
 #define log_range(var, assign) do { \
         float tmp = log10f(assign);  var = tmp; \
         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
@@ -601,8 +602,8 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
      * "moment" of the LPCs in this filter. */
-    ff_dct_calc(&s->dct, lpcs);
-    ff_dct_calc(&s->dst, lpcs);
+    s->dct.dct_calc(&s->dct, lpcs);
+    s->dst.dct_calc(&s->dst, lpcs);
 
     /* Split out the coefficient indexes into phase/magnitude pairs */
     idx = 255 + av_clip(lpcs[64],               -255, 255);
@@ -623,7 +624,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs,
     coeffs[1] = last_coeff;
 
     /* move into real domain */
-    ff_rdft_calc(&s->irdft, coeffs);
+    s->irdft.rdft_calc(&s->irdft, coeffs);
 
     /* tilt correction and normalize scale */
     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
@@ -693,8 +694,8 @@ static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
         /* apply coefficients (in frequency spectrum domain), i.e. complex
          * number multiplication */
         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
-        ff_rdft_calc(&s->rdft, synth_pf);
-        ff_rdft_calc(&s->rdft, coeffs);
+        s->rdft.rdft_calc(&s->rdft, synth_pf);
+        s->rdft.rdft_calc(&s->rdft, coeffs);
         synth_pf[0] *= coeffs[0];
         synth_pf[1] *= coeffs[1];
         for (n = 1; n < 64; n++) {
@@ -702,7 +703,7 @@ static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
         }
-        ff_rdft_calc(&s->irdft, synth_pf);
+        s->irdft.rdft_calc(&s->irdft, synth_pf);
     }
 
     /* merge filter output with the history of previous runs */
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index 5ca341d5e4..37081d67fc 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -18,6 +18,7 @@
 
 #include "libavutil/cpu.h"
 #include "libavcodec/dsputil.h"
+#include "libavcodec/dct.h"
 #include "fft.h"
 
 av_cold void ff_fft_init_mmx(FFTContext *s)