Added FreeSurround to Externals

Also cleaned up its source code to support only 5.1 and 7.1 setups.
2025-09-12 22:42:53 -06:00 · 2017-08-09 16:55:43 -03:00
parent 950b952aee
commit 7b9375875c
15 changed files with 2834 additions and 0 deletions
--- a/Externals/FreeSurround/source/ChannelMaps.cpp
+++ b/Externals/FreeSurround/source/ChannelMaps.cpp
--- a/Externals/FreeSurround/source/FreeSurroundDecoder.cpp
+++ b/Externals/FreeSurround/source/FreeSurroundDecoder.cpp
@ -0,0 +1,310 @@
+/*
+Copyright (C) 2007-2010 Christian Kothe
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+*/
+
+#include "FreeSurround/FreeSurroundDecoder.h"
+#include "FreeSurround/ChannelMaps.h"
+#include <cmath>
+
+#undef min
+#undef max
+
+// FreeSurround implementation
+// DPL2FSDecoder::Init() must be called before using the decoder.
+DPL2FSDecoder::DPL2FSDecoder() {
+  initialized = false;
+  buffer_empty = true;
+}
+
+DPL2FSDecoder::~DPL2FSDecoder() {
+#pragma warning(suppress : 4150)
+  delete forward;
+#pragma warning(suppress : 4150)
+  delete inverse;
+}
+
+void DPL2FSDecoder::Init(channel_setup chsetup, unsigned int blsize,
+                         unsigned int sample_rate) {
+  if (!initialized) {
+    setup = chsetup;
+    N = blsize;
+    samplerate = sample_rate;
+
+    // Initialize the parameters
+    wnd = std::vector<double>(N);
+    inbuf = std::vector<float>(3 * N);
+    lt = std::vector<double>(N);
+    rt = std::vector<double>(N);
+    dst = std::vector<double>(N);
+    lf = std::vector<cplx>(N / 2 + 1);
+    rf = std::vector<cplx>(N / 2 + 1);
+    forward = kiss_fftr_alloc(N, 0, 0, 0);
+    inverse = kiss_fftr_alloc(N, 1, 0, 0);
+    C = static_cast<unsigned int>(chn_alloc[setup].size());
+
+    // Allocate per-channel buffers
+    outbuf.resize((N + N / 2) * C);
+    signal.resize(C, std::vector<cplx>(N));
+
+    // Init the window function
+    for (unsigned int k = 0; k < N; k++)
+      wnd[k] = sqrt(0.5 * (1 - cos(2 * pi * k / N)) / N);
+
+    // set default parameters
+    set_circular_wrap(90);
+    set_shift(0);
+    set_depth(1);
+    set_focus(0);
+    set_center_image(1);
+    set_front_separation(1);
+    set_rear_separation(1);
+    set_low_cutoff(40.0f / samplerate * 2);
+    set_high_cutoff(90.0f / samplerate * 2);
+    set_bass_redirection(false);
+
+    initialized = true;
+  }
+}
+
+// decode a stereo chunk, produces a multichannel chunk of the same size
+// (lagged)
+float *DPL2FSDecoder::decode(float *input) {
+  if (initialized) {
+    // append incoming data to the end of the input buffer
+    memcpy(&inbuf[N], &input[0], 8 * N);
+    // process first and second half, overlapped
+    buffered_decode(&inbuf[0]);
+    buffered_decode(&inbuf[N]);
+    // shift last half of the input to the beginning (for overlapping with a
+    // future block)
+    memcpy(&inbuf[0], &inbuf[2 * N], 4 * N);
+    buffer_empty = false;
+    return &outbuf[0];
+  }
+  return 0;
+}
+
+// flush the internal buffers
+void DPL2FSDecoder::flush() {
+  memset(&outbuf[0], 0, outbuf.size() * 4);
+  memset(&inbuf[0], 0, inbuf.size() * 4);
+  buffer_empty = true;
+}
+
+// number of samples currently held in the buffer
+unsigned int DPL2FSDecoder::buffered() { return buffer_empty ? 0 : N / 2; }
+
+// set soundfield & rendering parameters
+void DPL2FSDecoder::set_circular_wrap(float v) { circular_wrap = v; }
+void DPL2FSDecoder::set_shift(float v) { shift = v; }
+void DPL2FSDecoder::set_depth(float v) { depth = v; }
+void DPL2FSDecoder::set_focus(float v) { focus = v; }
+void DPL2FSDecoder::set_center_image(float v) { center_image = v; }
+void DPL2FSDecoder::set_front_separation(float v) { front_separation = v; }
+void DPL2FSDecoder::set_rear_separation(float v) { rear_separation = v; }
+void DPL2FSDecoder::set_low_cutoff(float v) { lo_cut = v * (N / 2); }
+void DPL2FSDecoder::set_high_cutoff(float v) { hi_cut = v * (N / 2); }
+void DPL2FSDecoder::set_bass_redirection(bool v) { use_lfe = v; }
+
+// helper functions
+inline float DPL2FSDecoder::sqr(double x) { return static_cast<float>(x * x); }
+inline double DPL2FSDecoder::amplitude(const cplx &x) {
+  return sqrt(sqr(x.real()) + sqr(x.imag()));
+}
+inline double DPL2FSDecoder::phase(const cplx &x) {
+  return atan2(x.imag(), x.real());
+}
+inline cplx DPL2FSDecoder::polar(double a, double p) {
+  return cplx(a * cos(p), a * sin(p));
+}
+inline float DPL2FSDecoder::min(double a, double b) {
+  return static_cast<float>(a < b ? a : b);
+}
+inline float DPL2FSDecoder::max(double a, double b) {
+  return static_cast<float>(a > b ? a : b);
+}
+inline float DPL2FSDecoder::clamp(double x) { return max(-1, min(1, x)); }
+inline float DPL2FSDecoder::sign(double x) {
+  return static_cast<float>(x < 0 ? -1 : (x > 0 ? 1 : 0));
+}
+// get the distance of the soundfield edge, along a given angle
+inline double DPL2FSDecoder::edgedistance(double a) {
+  return min(sqrt(1 + sqr(tan(a))), sqrt(1 + sqr(1 / tan(a))));
+}
+// get the index (and fractional offset!) in a piecewise-linear channel
+// allocation grid
+int DPL2FSDecoder::map_to_grid(double &x) {
+  double gp = ((x + 1) * 0.5) * (grid_res - 1),
+         i = min(grid_res - 2, floor(gp));
+  x = gp - i;
+  return static_cast<int>(i);
+}
+
+// decode a block of data and overlap-add it into outbuf
+void DPL2FSDecoder::buffered_decode(float *input) {
+  // demultiplex and apply window function
+  for (unsigned int k = 0; k < N; k++) {
+    lt[k] = wnd[k] * input[k * 2 + 0];
+    rt[k] = wnd[k] * input[k * 2 + 1];
+  }
+
+  // map into spectral domain
+  kiss_fftr(forward, &lt[0], (kiss_fft_cpx *)&lf[0]);
+  kiss_fftr(forward, &rt[0], (kiss_fft_cpx *)&rf[0]);
+
+  // compute multichannel output signal in the spectral domain
+  for (unsigned int f = 1; f < N / 2; f++) {
+    // get Lt/Rt amplitudes & phases
+    double ampL = amplitude(lf[f]), ampR = amplitude(rf[f]);
+    double phaseL = phase(lf[f]), phaseR = phase(rf[f]);
+    // calculate the amplitude & phase differences
+    double ampDiff =
+        clamp((ampL + ampR < epsilon) ? 0 : (ampR - ampL) / (ampR + ampL));
+    double phaseDiff = abs(phaseL - phaseR);
+    if (phaseDiff > pi)
+      phaseDiff = 2 * pi - phaseDiff;
+
+    // decode into x/y soundfield position
+    double x, y;
+    transform_decode(ampDiff, phaseDiff, x, y);
+    // add wrap control
+    transform_circular_wrap(x, y, circular_wrap);
+    // add shift control
+    y = clamp(y - shift);
+    // add depth control
+    y = clamp(1 - (1 - y) * depth);
+    // add focus control
+    transform_focus(x, y, focus);
+    // add crossfeed control
+    x = clamp(x *
+              (front_separation * (1 + y) / 2 + rear_separation * (1 - y) / 2));
+
+    // get total signal amplitude
+    double amp_total = sqrt(ampL * ampL + ampR * ampR);
+    // and total L/C/R signal phases
+    double phase_of[] = {
+        phaseL, atan2(lf[f].imag() + rf[f].imag(), lf[f].real() + rf[f].real()),
+        phaseR};
+    // compute 2d channel map indexes p/q and update x/y to fractional offsets
+    // in the map grid
+    int p = map_to_grid(x), q = map_to_grid(y);
+    // map position to channel volumes
+    for (unsigned int c = 0; c < C - 1; c++) {
+      // look up channel map at respective position (with bilinear
+      // interpolation) and build the
+      // signal
+      std::vector<float *> &a = chn_alloc[setup][c];
+      signal[c][f] = polar(
+          amp_total * ((1 - x) * (1 - y) * a[q][p] + x * (1 - y) * a[q][p + 1] +
+                       (1 - x) * y * a[q + 1][p] + x * y * a[q + 1][p + 1]),
+          phase_of[1 + static_cast<int>(sign(chn_xsf[setup][c]))]);
+    }
+
+    // optionally redirect bass
+    if (use_lfe && f < hi_cut) {
+      // level of LFE channel according to normalized frequency
+      double lfe_level =
+          f < lo_cut ? 1
+                     : 0.5 * (1 + cos(pi * (f - lo_cut) / (hi_cut - lo_cut)));
+      // assign LFE channel
+      signal[C - 1][f] = lfe_level * polar(amp_total, phase_of[1]);
+      // subtract the signal from the other channels
+      for (unsigned int c = 0; c < C - 1; c++)
+        signal[c][f] *= (1 - lfe_level);
+    }
+  }
+
+  // shift the last 2/3 to the first 2/3 of the output buffer
+  memcpy(&outbuf[0], &outbuf[C * N / 2], N * C * 4);
+  // and clear the rest
+  memset(&outbuf[C * N], 0, C * 4 * N / 2);
+  // backtransform each channel and overlap-add
+  for (unsigned int c = 0; c < C; c++) {
+    // back-transform into time domain
+    kiss_fftri(inverse, (kiss_fft_cpx *)&signal[c][0], &dst[0]);
+    // add the result to the last 2/3 of the output buffer, windowed (and
+    // remultiplex)
+    for (unsigned int k = 0; k < N; k++)
+      outbuf[C * (k + N / 2) + c] += static_cast<float>(wnd[k] * dst[k]);
+  }
+}
+
+// transform amp/phase difference space into x/y soundfield space
+void DPL2FSDecoder::transform_decode(double a, double p, double &x, double &y) {
+  x = clamp(1.0047 * a + 0.46804 * a * p * p * p - 0.2042 * a * p * p * p * p +
+            0.0080586 * a * p * p * p * p * p * p * p -
+            0.0001526 * a * p * p * p * p * p * p * p * p * p * p -
+            0.073512 * a * a * a * p - 0.2499 * a * a * a * p * p * p * p +
+            0.016932 * a * a * a * p * p * p * p * p * p * p -
+            0.00027707 * a * a * a * p * p * p * p * p * p * p * p * p * p +
+            0.048105 * a * a * a * a * a * p * p * p * p * p * p * p -
+            0.0065947 * a * a * a * a * a * p * p * p * p * p * p * p * p * p *
+                p +
+            0.0016006 * a * a * a * a * a * p * p * p * p * p * p * p * p * p *
+                p * p -
+            0.0071132 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
+                p * p +
+            0.0022336 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
+                p * p * p * p -
+            0.0004804 * a * a * a * a * a * a * a * p * p * p * p * p * p * p *
+                p * p * p * p * p);
+  y = clamp(
+      0.98592 - 0.62237 * p + 0.077875 * p * p - 0.0026929 * p * p * p * p * p +
+      0.4971 * a * a * p - 0.00032124 * a * a * p * p * p * p * p * p +
+      9.2491e-006 * a * a * a * a * p * p * p * p * p * p * p * p * p * p +
+      0.051549 * a * a * a * a * a * a * a * a +
+      1.0727e-014 * a * a * a * a * a * a * a * a * a * a);
+}
+
+// apply a circular_wrap transformation to some position
+void DPL2FSDecoder::transform_circular_wrap(double &x, double &y,
+                                            double refangle) {
+  if (refangle == 90)
+    return;
+  refangle = refangle * pi / 180;
+  double baseangle = 90 * pi / 180;
+  // translate into edge-normalized polar coordinates
+  double ang = atan2(x, y), len = sqrt(x * x + y * y);
+  len = len / edgedistance(ang);
+  // apply circular_wrap transform
+  if (abs(ang) < baseangle / 2)
+    // angle falls within the front region (to be enlarged)
+    ang *= refangle / baseangle;
+  else
+    // angle falls within the rear region (to be shrunken)
+    ang = pi - (-(((refangle - 2 * pi) * (pi - abs(ang)) * sign(ang)) /
+                  (2 * pi - baseangle)));
+  // translate back into soundfield position
+  len = len * edgedistance(ang);
+  x = clamp(sin(ang) * len);
+  y = clamp(cos(ang) * len);
+}
+
+// apply a focus transformation to some position
+void DPL2FSDecoder::transform_focus(double &x, double &y, double focus) {
+  if (focus == 0)
+    return;
+  // translate into edge-normalized polar coordinates
+  double ang = atan2(x, y),
+         len = clamp(sqrt(x * x + y * y) / edgedistance(ang));
+  // apply focus
+  len = focus > 0 ? 1 - pow(1 - len, 1 + focus * 20) : pow(len, 1 - focus * 20);
+  // back-transform into euclidian soundfield position
+  len = len * edgedistance(ang);
+  x = clamp(sin(ang) * len);
+  y = clamp(cos(ang) * len);
+}
--- a/Externals/FreeSurround/source/KissFFT.cpp
+++ b/Externals/FreeSurround/source/KissFFT.cpp
@ -0,0 +1,444 @@
+/*
+Copyright (c) 2003-2010, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted
+provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+this list of conditions
+and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+this list of
+conditions and the following disclaimer in the documentation and/or other
+materials provided with
+the distribution.
+    * Neither the author nor the names of any contributors may be used to
+endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "FreeSurround/_KissFFTGuts.h"
+/* The guts header contains all the multiplication and addition macros that are
+ defined for
+ fixed or floating point complex numbers.  It also delares the kf_ internal
+ functions.
+ */
+
+static void kf_bfly2(kiss_fft_cpx *Fout, const size_t fstride,
+                     const kiss_fft_cfg st, int m) {
+  kiss_fft_cpx *Fout2;
+  kiss_fft_cpx *tw1 = st->twiddles;
+  kiss_fft_cpx t;
+  Fout2 = Fout + m;
+  do {
+    C_FIXDIV(*Fout, 2);
+    C_FIXDIV(*Fout2, 2);
+
+    C_MUL(t, *Fout2, *tw1);
+    tw1 += fstride;
+    C_SUB(*Fout2, *Fout, t);
+    C_ADDTO(*Fout, t);
+    ++Fout2;
+    ++Fout;
+  } while (--m);
+}
+
+static void kf_bfly4(kiss_fft_cpx *Fout, const size_t fstride,
+                     const kiss_fft_cfg st, const size_t m) {
+  kiss_fft_cpx *tw1, *tw2, *tw3;
+  kiss_fft_cpx scratch[6];
+  size_t k = m;
+  const size_t m2 = 2 * m;
+  const size_t m3 = 3 * m;
+
+  tw3 = tw2 = tw1 = st->twiddles;
+
+  do {
+    C_FIXDIV(*Fout, 4);
+    C_FIXDIV(Fout[m], 4);
+    C_FIXDIV(Fout[m2], 4);
+    C_FIXDIV(Fout[m3], 4);
+
+    C_MUL(scratch[0], Fout[m], *tw1);
+    C_MUL(scratch[1], Fout[m2], *tw2);
+    C_MUL(scratch[2], Fout[m3], *tw3);
+
+    C_SUB(scratch[5], *Fout, scratch[1]);
+    C_ADDTO(*Fout, scratch[1]);
+    C_ADD(scratch[3], scratch[0], scratch[2]);
+    C_SUB(scratch[4], scratch[0], scratch[2]);
+    C_SUB(Fout[m2], *Fout, scratch[3]);
+    tw1 += fstride;
+    tw2 += fstride * 2;
+    tw3 += fstride * 3;
+    C_ADDTO(*Fout, scratch[3]);
+
+    if (st->inverse) {
+      Fout[m].r = scratch[5].r - scratch[4].i;
+      Fout[m].i = scratch[5].i + scratch[4].r;
+      Fout[m3].r = scratch[5].r + scratch[4].i;
+      Fout[m3].i = scratch[5].i - scratch[4].r;
+    } else {
+      Fout[m].r = scratch[5].r + scratch[4].i;
+      Fout[m].i = scratch[5].i - scratch[4].r;
+      Fout[m3].r = scratch[5].r - scratch[4].i;
+      Fout[m3].i = scratch[5].i + scratch[4].r;
+    }
+    ++Fout;
+  } while (--k);
+}
+
+static void kf_bfly3(kiss_fft_cpx *Fout, const size_t fstride,
+                     const kiss_fft_cfg st, size_t m) {
+  size_t k = m;
+  const size_t m2 = 2 * m;
+  kiss_fft_cpx *tw1, *tw2;
+  kiss_fft_cpx scratch[5];
+  kiss_fft_cpx epi3;
+  epi3 = st->twiddles[fstride * m];
+
+  tw1 = tw2 = st->twiddles;
+
+  do {
+    C_FIXDIV(*Fout, 3);
+    C_FIXDIV(Fout[m], 3);
+    C_FIXDIV(Fout[m2], 3);
+
+    C_MUL(scratch[1], Fout[m], *tw1);
+    C_MUL(scratch[2], Fout[m2], *tw2);
+
+    C_ADD(scratch[3], scratch[1], scratch[2]);
+    C_SUB(scratch[0], scratch[1], scratch[2]);
+    tw1 += fstride;
+    tw2 += fstride * 2;
+
+    Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+    Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+    C_MULBYSCALAR(scratch[0], epi3.i);
+
+    C_ADDTO(*Fout, scratch[3]);
+
+    Fout[m2].r = Fout[m].r + scratch[0].i;
+    Fout[m2].i = Fout[m].i - scratch[0].r;
+
+    Fout[m].r -= scratch[0].i;
+    Fout[m].i += scratch[0].r;
+
+    ++Fout;
+  } while (--k);
+}
+
+static void kf_bfly5(kiss_fft_cpx *Fout, const size_t fstride,
+                     const kiss_fft_cfg st, int m) {
+  kiss_fft_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
+  int u;
+  kiss_fft_cpx scratch[13];
+  kiss_fft_cpx *twiddles = st->twiddles;
+  kiss_fft_cpx *tw;
+  kiss_fft_cpx ya, yb;
+  ya = twiddles[fstride * m];
+  yb = twiddles[fstride * 2 * m];
+
+  Fout0 = Fout;
+  Fout1 = Fout0 + m;
+  Fout2 = Fout0 + 2 * m;
+  Fout3 = Fout0 + 3 * m;
+  Fout4 = Fout0 + 4 * m;
+
+  tw = st->twiddles;
+  for (u = 0; u < m; ++u) {
+    C_FIXDIV(*Fout0, 5);
+    C_FIXDIV(*Fout1, 5);
+    C_FIXDIV(*Fout2, 5);
+    C_FIXDIV(*Fout3, 5);
+    C_FIXDIV(*Fout4, 5);
+    scratch[0] = *Fout0;
+
+    C_MUL(scratch[1], *Fout1, tw[u * fstride]);
+    C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]);
+    C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]);
+    C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]);
+
+    C_ADD(scratch[7], scratch[1], scratch[4]);
+    C_SUB(scratch[10], scratch[1], scratch[4]);
+    C_ADD(scratch[8], scratch[2], scratch[3]);
+    C_SUB(scratch[9], scratch[2], scratch[3]);
+
+    Fout0->r += scratch[7].r + scratch[8].r;
+    Fout0->i += scratch[7].i + scratch[8].i;
+
+    scratch[5].r =
+        scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
+    scratch[5].i =
+        scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
+
+    scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
+    scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
+
+    C_SUB(*Fout1, scratch[5], scratch[6]);
+    C_ADD(*Fout4, scratch[5], scratch[6]);
+
+    scratch[11].r =
+        scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
+    scratch[11].i =
+        scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
+    scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
+    scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
+
+    C_ADD(*Fout2, scratch[11], scratch[12]);
+    C_SUB(*Fout3, scratch[11], scratch[12]);
+
+    ++Fout0;
+    ++Fout1;
+    ++Fout2;
+    ++Fout3;
+    ++Fout4;
+  }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(kiss_fft_cpx *Fout, const size_t fstride,
+                            const kiss_fft_cfg st, int m, int p) {
+  int u, k, q1, q;
+  kiss_fft_cpx *twiddles = st->twiddles;
+  kiss_fft_cpx t;
+  int Norig = st->nfft;
+
+  kiss_fft_cpx *scratch =
+      (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
+
+  for (u = 0; u < m; ++u) {
+    k = u;
+    for (q1 = 0; q1 < p; ++q1) {
+      scratch[q1] = Fout[k];
+      C_FIXDIV(scratch[q1], p);
+      k += m;
+    }
+
+    k = u;
+    for (q1 = 0; q1 < p; ++q1) {
+      int twidx = 0;
+      Fout[k] = scratch[0];
+      for (q = 1; q < p; ++q) {
+        twidx += static_cast<int>(fstride) * k;
+        if (twidx >= Norig)
+          twidx -= Norig;
+        C_MUL(t, scratch[q], twiddles[twidx]);
+        C_ADDTO(Fout[k], t);
+      }
+      k += m;
+    }
+  }
+  KISS_FFT_TMP_FREE(scratch);
+}
+
+static void kf_work(kiss_fft_cpx *Fout, const kiss_fft_cpx *f,
+                    const size_t fstride, int in_stride, int *factors,
+                    const kiss_fft_cfg st) {
+  kiss_fft_cpx *Fout_beg = Fout;
+  const int p = *factors++; /* the radix  */
+  const int m = *factors++; /* stage's fft length/p */
+  const kiss_fft_cpx *Fout_end = Fout + p * m;
+
+#ifdef _OPENMP
+  // use openmp extensions at the
+  // top-level (not recursive)
+  if (fstride == 1 && p <= 5) {
+    int k;
+
+// execute the p different work units in different threads
+#pragma omp parallel for
+    for (k = 0; k < p; ++k)
+      kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride,
+              factors, st);
+    // all threads have joined by this point
+
+    switch (p) {
+    case 2:
+      kf_bfly2(Fout, fstride, st, m);
+      break;
+    case 3:
+      kf_bfly3(Fout, fstride, st, m);
+      break;
+    case 4:
+      kf_bfly4(Fout, fstride, st, m);
+      break;
+    case 5:
+      kf_bfly5(Fout, fstride, st, m);
+      break;
+    default:
+      kf_bfly_generic(Fout, fstride, st, m, p);
+      break;
+    }
+    return;
+  }
+#endif
+
+  if (m == 1) {
+    do {
+      *Fout = *f;
+      f += fstride * in_stride;
+    } while (++Fout != Fout_end);
+  } else {
+    do {
+      // recursive call:
+      // DFT of size m*p performed by doing
+      // p instances of smaller DFTs of size m,
+      // each one takes a decimated version of the input
+      kf_work(Fout, f, fstride * p, in_stride, factors, st);
+      f += fstride * in_stride;
+    } while ((Fout += m) != Fout_end);
+  }
+
+  Fout = Fout_beg;
+
+  // recombine the p smaller DFTs
+  switch (p) {
+  case 2:
+    kf_bfly2(Fout, fstride, st, m);
+    break;
+  case 3:
+    kf_bfly3(Fout, fstride, st, m);
+    break;
+  case 4:
+    kf_bfly4(Fout, fstride, st, m);
+    break;
+  case 5:
+    kf_bfly5(Fout, fstride, st, m);
+    break;
+  default:
+    kf_bfly_generic(Fout, fstride, st, m, p);
+    break;
+  }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static void kf_factor(int n, int *facbuf) {
+  int p = 4;
+  double floor_sqrt;
+  floor_sqrt = floor(sqrt((double)n));
+
+  /*factor out powers of 4, powers of 2, then any remaining primes */
+  do {
+    while (n % p) {
+      switch (p) {
+      case 4:
+        p = 2;
+        break;
+      case 2:
+        p = 3;
+        break;
+      default:
+        p += 2;
+        break;
+      }
+      if (p > floor_sqrt)
+        p = n; /* no more factors, skip to end */
+    }
+    n /= p;
+    *facbuf++ = p;
+    *facbuf++ = n;
+  } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc.  As
+ * such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void *mem,
+                            size_t *lenmem) {
+  kiss_fft_cfg st = NULL;
+  size_t memneeded = sizeof(struct kiss_fft_state) +
+                     sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
+
+  if (lenmem == NULL) {
+    st = (kiss_fft_cfg) new char[memneeded];
+  } else {
+    if (mem != NULL && *lenmem >= memneeded)
+      st = (kiss_fft_cfg)mem;
+    *lenmem = memneeded;
+  }
+  if (st) {
+    int i;
+    st->nfft = nfft;
+    st->inverse = inverse_fft;
+
+    for (i = 0; i < nfft; ++i) {
+      const double pi =
+          3.141592653589793238462643383279502884197169399375105820974944;
+      double phase = -2 * pi * i / nfft;
+      if (st->inverse)
+        phase *= -1;
+      kf_cexp(st->twiddles + i, phase);
+    }
+
+    kf_factor(nfft, st->factors);
+  }
+  return st;
+}
+
+void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx *fin,
+                     kiss_fft_cpx *fout, int in_stride) {
+  if (fin == fout) {
+    // NOTE: this is not really an in-place FFT algorithm.
+    // It just performs an out-of-place FFT into a temp buffer
+    kiss_fft_cpx *tmpbuf =
+        (kiss_fft_cpx *)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
+    kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
+    memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
+    KISS_FFT_TMP_FREE(tmpbuf);
+  } else {
+    kf_work(fout, fin, 1, in_stride, st->factors, st);
+  }
+}
+
+void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout) {
+  kiss_fft_stride(cfg, fin, fout, 1);
+}
+
+void kiss_fft_cleanup(void) {
+  // nothing needed any more
+}
+
+int kiss_fft_next_fast_size(int n) {
+  while (1) {
+    int m = n;
+    while ((m % 2) == 0)
+      m /= 2;
+    while ((m % 3) == 0)
+      m /= 3;
+    while ((m % 5) == 0)
+      m /= 5;
+    if (m <= 1)
+      break; /* n is completely factorable by twos, threes, and fives */
+    n++;
+  }
+  return n;
+}
--- a/Externals/FreeSurround/source/KissFFTR.cpp
+++ b/Externals/FreeSurround/source/KissFFTR.cpp
@ -0,0 +1,185 @@
+/*
+Copyright (c) 2003-2004, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted
+provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+this list of conditions
+and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+this list of
+conditions and the following disclaimer in the documentation and/or other
+materials provided with
+the distribution.
+    * Neither the author nor the names of any contributors may be used to
+endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "FreeSurround/KissFFTR.h"
+#include "FreeSurround/_KissFFTGuts.h"
+
+struct kiss_fftr_state {
+  kiss_fft_cfg substate;
+  kiss_fft_cpx *tmpbuf;
+  kiss_fft_cpx *super_twiddles;
+#ifdef USE_SIMD
+  void *pad;
+#endif
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void *mem,
+                              size_t *lenmem) {
+  int i;
+  kiss_fftr_cfg st = NULL;
+  size_t subsize = 65536 * 4, memneeded = 0;
+
+  if (nfft & 1) {
+    fprintf(stderr, "Real FFT optimization must be even.\n");
+    return NULL;
+  }
+  nfft >>= 1;
+
+  kiss_fft_alloc(nfft, inverse_fft, NULL, &subsize);
+  memneeded = sizeof(struct kiss_fftr_state) + subsize +
+              sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
+
+  if (lenmem == NULL) {
+    st = (kiss_fftr_cfg) new char[memneeded];
+  } else {
+    if (*lenmem >= memneeded)
+      st = (kiss_fftr_cfg)mem;
+    *lenmem = memneeded;
+  }
+  if (!st)
+    return NULL;
+
+  st->substate = (kiss_fft_cfg)(st + 1); /*just beyond kiss_fftr_state struct */
+  st->tmpbuf = (kiss_fft_cpx *)(((char *)st->substate) + subsize);
+  st->super_twiddles = st->tmpbuf + nfft;
+  kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+  for (i = 0; i < nfft / 2; ++i) {
+    double phase =
+        -3.14159265358979323846264338327 * ((double)(i + 1) / nfft + .5);
+    if (inverse_fft)
+      phase *= -1;
+    kf_cexp(st->super_twiddles + i, phase);
+  }
+  return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar *timedata,
+               kiss_fft_cpx *freqdata) {
+  /* input buffer timedata is stored row-wise */
+  int k, ncfft;
+  kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
+
+  if (st->substate->inverse) {
+    fprintf(stderr, "kiss fft usage error: improper alloc\n");
+    exit(1);
+  }
+
+  ncfft = st->substate->nfft;
+
+  /*perform the parallel fft of two real signals packed in real,imag*/
+  kiss_fft(st->substate, (const kiss_fft_cpx *)timedata, st->tmpbuf);
+  /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+   * contains the sum of the even-numbered elements of the input time sequence
+   * The imag part is the sum of the odd-numbered elements
+   *
+   * The sum of tdc.r and tdc.i is the sum of the input time sequence.
+   *      yielding DC of input time sequence
+   * The difference of tdc.r - tdc.i is the sum of the input (dot product)
+   * [1,-1,1,-1...
+   *      yielding Nyquist bin of input time sequence
+   */
+
+  tdc.r = st->tmpbuf[0].r;
+  tdc.i = st->tmpbuf[0].i;
+  C_FIXDIV(tdc, 2);
+  CHECK_OVERFLOW_OP(tdc.r, +, tdc.i);
+  CHECK_OVERFLOW_OP(tdc.r, -, tdc.i);
+  freqdata[0].r = tdc.r + tdc.i;
+  freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD
+  freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+  freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+  for (k = 1; k <= ncfft / 2; ++k) {
+    fpk = st->tmpbuf[k];
+    fpnk.r = st->tmpbuf[ncfft - k].r;
+    fpnk.i = -st->tmpbuf[ncfft - k].i;
+    C_FIXDIV(fpk, 2);
+    C_FIXDIV(fpnk, 2);
+
+    C_ADD(f1k, fpk, fpnk);
+    C_SUB(f2k, fpk, fpnk);
+    C_MUL(tw, f2k, st->super_twiddles[k - 1]);
+
+    freqdata[k].r = HALF_OF(f1k.r + tw.r);
+    freqdata[k].i = HALF_OF(f1k.i + tw.i);
+    freqdata[ncfft - k].r = HALF_OF(f1k.r - tw.r);
+    freqdata[ncfft - k].i = HALF_OF(tw.i - f1k.i);
+  }
+}
+
+void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx *freqdata,
+                kiss_fft_scalar *timedata) {
+  /* input buffer timedata is stored row-wise */
+  int k, ncfft;
+
+  if (st->substate->inverse == 0) {
+    fprintf(stderr, "kiss fft usage error: improper alloc\n");
+    exit(1);
+  }
+
+  ncfft = st->substate->nfft;
+
+  st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+  st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+  C_FIXDIV(st->tmpbuf[0], 2);
+
+  for (k = 1; k <= ncfft / 2; ++k) {
+    kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+    fk = freqdata[k];
+    fnkc.r = freqdata[ncfft - k].r;
+    fnkc.i = -freqdata[ncfft - k].i;
+    C_FIXDIV(fk, 2);
+    C_FIXDIV(fnkc, 2);
+
+    C_ADD(fek, fk, fnkc);
+    C_SUB(tmp, fk, fnkc);
+    C_MUL(fok, tmp, st->super_twiddles[k - 1]);
+    C_ADD(st->tmpbuf[k], fek, fok);
+    C_SUB(st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD
+    st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+    st->tmpbuf[ncfft - k].i *= -1;
+#endif
+  }
+  kiss_fft(st->substate, st->tmpbuf, (kiss_fft_cpx *)timedata);
+}