Update SoundTouch to 2.3.2 commit 1eda9c0b01039f29d230a46cda9f2290bbd1f62b

2025-07-25 07:09:48 -06:00 · 2023-03-24 16:20:21 -05:00
parent 7de01597c6
commit 4e3a366b2d
30 changed files with 1935 additions and 1713 deletions
--- a/Externals/soundtouch/FIRFilter.cpp
+++ b/Externals/soundtouch/FIRFilter.cpp
@ -2,22 +2,21 @@
 ///
 /// General FIR digital filter routines with MMX optimization. 
 ///
-/// Note : MMX optimized functions reside in a separate, platform-specific file, 
+/// Notes : MMX optimized functions reside in a separate, platform-specific file, 
 /// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
 ///
+/// This source file contains OpenMP optimizations that allow speeding up the
+/// corss-correlation algorithm by executing it in several threads / CPU cores 
+/// in parallel. See the following article link for more detailed discussion 
+/// about SoundTouch OpenMP optimizations:
+/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
+///
 /// Author        : Copyright (c) Olli Parviainen
 /// Author e-mail : oparviai 'at' iki.fi
 /// SoundTouch WWW: http://www.surina.net/soundtouch
 ///
 ////////////////////////////////////////////////////////////////////////////////
 //
-// Last changed  : $Date: 2015-02-21 23:24:29 +0200 (Sat, 21 Feb 2015) $
-// File revision : $Revision: 4 $
-//
-// $Id: FIRFilter.cpp 202 2015-02-21 21:24:29Z oparviai $
-//
-////////////////////////////////////////////////////////////////////////////////
-//
 // License :
 //
 //  SoundTouch audio processing library
@ -60,53 +59,43 @@ FIRFilter::FIRFilter()
    resultDivider = 0;
    length = 0;
    lengthDiv8 = 0;
-    filterCoeffs = NULL;
+    filterCoeffs = nullptr;
+    filterCoeffsStereo = nullptr;
 }


 FIRFilter::~FIRFilter()
 {
    delete[] filterCoeffs;
+    delete[] filterCoeffsStereo;
 }

+
 // Usual C-version of the filter routine for stereo sound
 uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
    int j, end;
-#ifdef SOUNDTOUCH_FLOAT_SAMPLES
-    // when using floating point samples, use a scaler instead of a divider
-    // because division is much slower operation than multiplying.
-    double dScaler = 1.0 / (double)resultDivider;
-#endif
+    // hint compiler autovectorization that loop length is divisible by 8
+    uint ilength = length & -8;

-    assert(length != 0);
-    assert(src != NULL);
-    assert(dest != NULL);
-    assert(filterCoeffs != NULL);
+    assert((length != 0) && (length == ilength) && (src != nullptr) && (dest != nullptr) && (filterCoeffs != nullptr));
+    assert(numSamples > ilength);

-    end = 2 * (numSamples - length);
+    end = 2 * (numSamples - ilength);

    #pragma omp parallel for
    for (j = 0; j < end; j += 2) 
    {
        const SAMPLETYPE *ptr;
        LONG_SAMPLETYPE suml, sumr;
-        uint i;

        suml = sumr = 0;
        ptr = src + j;

-        for (i = 0; i < length; i += 4) 
+        for (uint i = 0; i < ilength; i ++)
        {
-            // loop is unrolled by factor of 4 here for efficiency
-            suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 2] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 4] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 6] * filterCoeffs[i + 3];
-            sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
-                    ptr[2 * i + 3] * filterCoeffs[i + 1] +
-                    ptr[2 * i + 5] * filterCoeffs[i + 2] +
-                    ptr[2 * i + 7] * filterCoeffs[i + 3];
+            suml += ptr[2 * i] * filterCoeffsStereo[2 * i];
+            sumr += ptr[2 * i + 1] * filterCoeffsStereo[2 * i + 1];
        }

 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
@ -116,54 +105,41 @@ uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, ui
        suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
        // saturate to 16 bit integer limits
        sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
-#else
-        suml *= dScaler;
-        sumr *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
        dest[j] = (SAMPLETYPE)suml;
        dest[j + 1] = (SAMPLETYPE)sumr;
    }
-    return numSamples - length;
+    return numSamples - ilength;
 }


-
-
 // Usual C-version of the filter routine for mono sound
 uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
 {
    int j, end;
-#ifdef SOUNDTOUCH_FLOAT_SAMPLES
-    // when using floating point samples, use a scaler instead of a divider
-    // because division is much slower operation than multiplying.
-    double dScaler = 1.0 / (double)resultDivider;
-#endif

-    assert(length != 0);
+    // hint compiler autovectorization that loop length is divisible by 8
+    int ilength = length & -8;

-    end = numSamples - length;
+    assert(ilength != 0);
+
+    end = numSamples - ilength;
    #pragma omp parallel for
-    for (j = 0; j < end; j ++) 
+    for (j = 0; j < end; j ++)
    {
        const SAMPLETYPE *pSrc = src + j;
        LONG_SAMPLETYPE sum;
-        uint i;
+        int i;

        sum = 0;
-        for (i = 0; i < length; i += 4) 
+        for (i = 0; i < ilength; i ++)
        {
-            // loop is unrolled by factor of 4 here for efficiency
-            sum += pSrc[i + 0] * filterCoeffs[i + 0] + 
-                   pSrc[i + 1] * filterCoeffs[i + 1] + 
-                   pSrc[i + 2] * filterCoeffs[i + 2] + 
-                   pSrc[i + 3] * filterCoeffs[i + 3];
+            sum += pSrc[i] * filterCoeffs[i];
        }
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
        sum >>= resultDivFactor;
        // saturate to 16 bit integer limits
        sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
-#else
-        sum *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
        dest[j] = (SAMPLETYPE)sum;
    }
@ -175,26 +151,24 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
 {
    int j, end;

-#ifdef SOUNDTOUCH_FLOAT_SAMPLES
-    // when using floating point samples, use a scaler instead of a divider
-    // because division is much slower operation than multiplying.
-    double dScaler = 1.0 / (double)resultDivider;
-#endif
-
    assert(length != 0);
-    assert(src != NULL);
-    assert(dest != NULL);
-    assert(filterCoeffs != NULL);
+    assert(src != nullptr);
+    assert(dest != nullptr);
+    assert(filterCoeffs != nullptr);
    assert(numChannels < 16);

-    end = numChannels * (numSamples - length);
+    // hint compiler autovectorization that loop length is divisible by 8
+    int ilength = length & -8;
+
+    end = numChannels * (numSamples - ilength);

    #pragma omp parallel for
    for (j = 0; j < end; j += numChannels)
    {
        const SAMPLETYPE *ptr;
        LONG_SAMPLETYPE sums[16];
-        uint c, i;
+        uint c;
+        int i;

        for (c = 0; c < numChannels; c ++)
        {
@ -203,7 +177,7 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin

        ptr = src + j;

-        for (i = 0; i < length; i ++)
+        for (i = 0; i < ilength; i ++)
        {
            SAMPLETYPE coef=filterCoeffs[i];
            for (c = 0; c < numChannels; c ++)
@ -217,13 +191,11 @@ uint FIRFilter::evaluateFilterMulti(SAMPLETYPE *dest, const SAMPLETYPE *src, uin
        {
 #ifdef SOUNDTOUCH_INTEGER_SAMPLES
            sums[c] >>= resultDivFactor;
-#else
-            sums[c] *= dScaler;
 #endif // SOUNDTOUCH_INTEGER_SAMPLES
            dest[j+c] = (SAMPLETYPE)sums[c];
        }
    }
-    return numSamples - length;
+    return numSamples - ilength;
 }


@ -235,6 +207,13 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u
    assert(newLength > 0);
    if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8");

+    #ifdef SOUNDTOUCH_FLOAT_SAMPLES
+        // scale coefficients already here if using floating samples
+        double scale = 1.0 / resultDivider;
+    #else
+        short scale = 1;
+    #endif
+
    lengthDiv8 = newLength / 8;
    length = lengthDiv8 * 8;
    assert(length == newLength);
@ -244,7 +223,16 @@ void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint u

    delete[] filterCoeffs;
    filterCoeffs = new SAMPLETYPE[length];
-    memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
+    delete[] filterCoeffsStereo;
+    filterCoeffsStereo = new SAMPLETYPE[length*2];
+    for (uint i = 0; i < length; i ++)
+    {
+        filterCoeffs[i] = (SAMPLETYPE)(coeffs[i] * scale);
+        // create also stereo set of filter coefficients: this allows compiler
+        // to autovectorize filter evaluation much more efficiently
+        filterCoeffsStereo[2 * i] = (SAMPLETYPE)(coeffs[i] * scale);
+        filterCoeffsStereo[2 * i + 1] = (SAMPLETYPE)(coeffs[i] * scale);
+    }
 }


@ -254,7 +242,6 @@ uint FIRFilter::getLength() const
 }


-
 // Applies the filter to the given sequence of samples. 
 //
 // Note : The amount of outputted samples is by value of 'filter_length' 
@ -284,10 +271,9 @@ uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSample
 }


-
 // Operator 'new' is overloaded so that it automatically creates a suitable instance 
 // depending on if we've a MMX-capable CPU available or not.
-void * FIRFilter::operator new(size_t s)
+void * FIRFilter::operator new(size_t)
 {
    // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
    ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");