mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-23 06:09:50 -06:00
Update SoundTouch to 2.3.2 commit 1eda9c0b01039f29d230a46cda9f2290bbd1f62b
This commit is contained in:
273
Externals/soundtouch/TDStretch.cpp
vendored
273
Externals/soundtouch/TDStretch.cpp
vendored
@ -1,11 +1,17 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
///
|
||||
/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
|
||||
/// while maintaining the original pitch by using a time domain WSOLA-like
|
||||
/// method with several performance-increasing tweaks.
|
||||
///
|
||||
/// Note : MMX optimized functions reside in a separate, platform-specific
|
||||
/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
|
||||
/// Notes : MMX optimized functions reside in a separate, platform-specific
|
||||
/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
|
||||
///
|
||||
/// This source file contains OpenMP optimizations that allow speeding up the
|
||||
/// corss-correlation algorithm by executing it in several threads / CPU cores
|
||||
/// in parallel. See the following article link for more detailed discussion
|
||||
/// about SoundTouch OpenMP optimizations:
|
||||
/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
|
||||
///
|
||||
/// Author : Copyright (c) Olli Parviainen
|
||||
/// Author e-mail : oparviai 'at' iki.fi
|
||||
@ -13,13 +19,6 @@
|
||||
///
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Last changed : $Date: 2015-08-09 00:00:15 +0300 (Sun, 09 Aug 2015) $
|
||||
// File revision : $Revision: 1.12 $
|
||||
//
|
||||
// $Id: TDStretch.cpp 226 2015-08-08 21:00:15Z oparviai $
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// License :
|
||||
//
|
||||
// SoundTouch audio processing library
|
||||
@ -55,26 +54,6 @@ using namespace soundtouch;
|
||||
|
||||
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* Constant definitions
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
// Table for the hierarchical mixing position seeking algorithm
|
||||
const short _scanOffsets[5][24]={
|
||||
{ 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
|
||||
868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
|
||||
{-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111,
|
||||
116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}};
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* Implementation of the class 'TDStretch'
|
||||
@ -87,18 +66,13 @@ TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
||||
bQuickSeek = false;
|
||||
channels = 2;
|
||||
|
||||
pMidBuffer = NULL;
|
||||
pMidBufferUnaligned = NULL;
|
||||
pMidBuffer = nullptr;
|
||||
pMidBufferUnaligned = nullptr;
|
||||
overlapLength = 0;
|
||||
|
||||
bAutoSeqSetting = true;
|
||||
bAutoSeekSetting = true;
|
||||
|
||||
maxnorm = 0;
|
||||
maxnormf = 1e8;
|
||||
|
||||
skipFract = 0;
|
||||
|
||||
tempo = 1.0f;
|
||||
setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
|
||||
setTempo(1.0f);
|
||||
@ -128,8 +102,13 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
|
||||
int aSeekWindowMS, int aOverlapMS)
|
||||
{
|
||||
// accept only positive parameter values - if zero or negative, use old values instead
|
||||
if (aSampleRate > 0) this->sampleRate = aSampleRate;
|
||||
if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
|
||||
if (aSampleRate > 0)
|
||||
{
|
||||
if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
|
||||
this->sampleRate = aSampleRate;
|
||||
}
|
||||
|
||||
if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
|
||||
|
||||
if (aSequenceMS > 0)
|
||||
{
|
||||
@ -164,7 +143,7 @@ void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
|
||||
|
||||
|
||||
/// Get routine control parameters, see setParameters() function.
|
||||
/// Any of the parameters to this function can be NULL, in such case corresponding parameter
|
||||
/// Any of the parameters to this function can be nullptr, in such case corresponding parameter
|
||||
/// value isn't returned.
|
||||
void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
|
||||
{
|
||||
@ -199,7 +178,7 @@ void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
||||
m1 = (SAMPLETYPE)0;
|
||||
m2 = (SAMPLETYPE)overlapLength;
|
||||
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
{
|
||||
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
||||
m1 += 1;
|
||||
@ -219,6 +198,10 @@ void TDStretch::clearInput()
|
||||
{
|
||||
inputBuffer.clear();
|
||||
clearMidBuffer();
|
||||
isBeginning = true;
|
||||
maxnorm = 0;
|
||||
maxnormf = 1e8;
|
||||
skipFract = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -297,21 +280,23 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
||||
int i;
|
||||
double norm;
|
||||
|
||||
bestCorr = FLT_MIN;
|
||||
bestCorr = -FLT_MAX;
|
||||
bestOffs = 0;
|
||||
|
||||
// Scans for the best correlation value by testing each possible position
|
||||
// over the permitted range.
|
||||
bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
|
||||
bestCorr = (bestCorr + 0.1) * 0.75;
|
||||
|
||||
#pragma omp parallel for
|
||||
for (i = 1; i < seekLength; i ++)
|
||||
for (i = 1; i < seekLength; i ++)
|
||||
{
|
||||
double corr;
|
||||
// Calculates correlation value for the mixing position corresponding to 'i'
|
||||
#ifdef _OPENMP
|
||||
#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
|
||||
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
|
||||
// iterate the loop in sequential order
|
||||
// in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
|
||||
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
|
||||
#else
|
||||
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
|
||||
@ -354,7 +339,7 @@ int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
||||
// with improved precision
|
||||
//
|
||||
// Based on testing:
|
||||
// - This algorithm gives on average 99% as good match as the full algorith
|
||||
// - This algorithm gives on average 99% as good match as the full algorithm
|
||||
// - this quick seek algorithm finds the best match on ~90% of cases
|
||||
// - on those 10% of cases when this algorithm doesn't find best match,
|
||||
// it still finds on average ~90% match vs. the best possible match
|
||||
@ -373,12 +358,10 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
|
||||
|
||||
// note: 'float' types used in this function in case that the platform would need to use software-fp
|
||||
|
||||
bestCorr = FLT_MIN;
|
||||
bestOffs = SCANWIND;
|
||||
bestCorr2 = FLT_MIN;
|
||||
bestOffs2 = 0;
|
||||
|
||||
int best = 0;
|
||||
bestCorr =
|
||||
bestCorr2 = -FLT_MAX;
|
||||
bestOffs =
|
||||
bestOffs2 = SCANWIND;
|
||||
|
||||
// Scans for the best correlation value by testing each possible position
|
||||
// over the permitted range. Look for two best matches on the first pass to
|
||||
@ -436,7 +419,6 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
|
||||
{
|
||||
bestCorr = corr;
|
||||
bestOffs = i;
|
||||
best = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -458,7 +440,6 @@ int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
|
||||
{
|
||||
bestCorr = corr;
|
||||
bestOffs = i;
|
||||
best = 2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -515,18 +496,18 @@ void TDStretch::clearCrossCorrState()
|
||||
void TDStretch::calcSeqParameters()
|
||||
{
|
||||
// Adjust tempo param according to tempo, so that variating processing sequence length is used
|
||||
// at varius tempo settings, between the given low...top limits
|
||||
// at various tempo settings, between the given low...top limits
|
||||
#define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
|
||||
#define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
|
||||
|
||||
// sequence-ms setting values at above low & top tempo
|
||||
#define AUTOSEQ_AT_MIN 125.0
|
||||
#define AUTOSEQ_AT_MAX 50.0
|
||||
#define AUTOSEQ_AT_MIN 90.0
|
||||
#define AUTOSEQ_AT_MAX 40.0
|
||||
#define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
|
||||
#define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
|
||||
|
||||
// seek-window-ms setting values at above low & top tempoq
|
||||
#define AUTOSEEK_AT_MIN 25.0
|
||||
#define AUTOSEEK_AT_MIN 20.0
|
||||
#define AUTOSEEK_AT_MAX 15.0
|
||||
#define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
|
||||
#define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
|
||||
@ -586,9 +567,8 @@ void TDStretch::setTempo(double newTempo)
|
||||
// Sets the number of channels, 1 = mono, 2 = stereo
|
||||
void TDStretch::setChannels(int numChannels)
|
||||
{
|
||||
assert(numChannels > 0);
|
||||
if (channels == numChannels) return;
|
||||
// assert(numChannels == 1 || numChannels == 2);
|
||||
if (!verifyNumberOfChannels(numChannels) ||
|
||||
(channels == numChannels)) return;
|
||||
|
||||
channels = numChannels;
|
||||
inputBuffer.setChannels(channels);
|
||||
@ -637,7 +617,8 @@ void TDStretch::processNominalTempo()
|
||||
// the result into 'outputBuffer'
|
||||
void TDStretch::processSamples()
|
||||
{
|
||||
int ovlSkip, offset;
|
||||
int ovlSkip;
|
||||
int offset = 0;
|
||||
int temp;
|
||||
|
||||
/* Removed this small optimization - can introduce a click to sound when tempo setting
|
||||
@ -654,35 +635,62 @@ void TDStretch::processSamples()
|
||||
// to form a processing frame.
|
||||
while ((int)inputBuffer.numSamples() >= sampleReq)
|
||||
{
|
||||
// If tempo differs from the normal ('SCALE'), scan for the best overlapping
|
||||
// position
|
||||
offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
|
||||
if (isBeginning == false)
|
||||
{
|
||||
// apart from the very beginning of the track,
|
||||
// scan for the best overlapping position & do overlap-add
|
||||
offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
|
||||
|
||||
// Mix the samples in the 'inputBuffer' at position of 'offset' with the
|
||||
// samples in 'midBuffer' using sliding overlapping
|
||||
// ... first partially overlap with the end of the previous sequence
|
||||
// (that's in 'midBuffer')
|
||||
overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
|
||||
outputBuffer.putSamples((uint)overlapLength);
|
||||
// Mix the samples in the 'inputBuffer' at position of 'offset' with the
|
||||
// samples in 'midBuffer' using sliding overlapping
|
||||
// ... first partially overlap with the end of the previous sequence
|
||||
// (that's in 'midBuffer')
|
||||
overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
|
||||
outputBuffer.putSamples((uint)overlapLength);
|
||||
offset += overlapLength;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Adjust processing offset at beginning of track by not perform initial overlapping
|
||||
// and compensating that in the 'input buffer skip' calculation
|
||||
isBeginning = false;
|
||||
int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
|
||||
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode, round the skip amount to value corresponding to aligned memory address
|
||||
if (channels == 1)
|
||||
{
|
||||
skip &= -4;
|
||||
}
|
||||
else if (channels == 2)
|
||||
{
|
||||
skip &= -2;
|
||||
}
|
||||
#endif
|
||||
skipFract -= skip;
|
||||
if (skipFract <= -nominalSkip)
|
||||
{
|
||||
skipFract = -nominalSkip;
|
||||
}
|
||||
}
|
||||
|
||||
// ... then copy sequence samples from 'inputBuffer' to output:
|
||||
|
||||
// length of sequence
|
||||
temp = (seekWindowLength - 2 * overlapLength);
|
||||
|
||||
// crosscheck that we don't have buffer overflow...
|
||||
if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
|
||||
if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
|
||||
{
|
||||
continue; // just in case, shouldn't really happen
|
||||
}
|
||||
|
||||
outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
|
||||
// length of sequence
|
||||
temp = (seekWindowLength - 2 * overlapLength);
|
||||
outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
|
||||
|
||||
// Copies the end of the current sequence from 'inputBuffer' to
|
||||
// 'midBuffer' for being mixed with the beginning of the next
|
||||
// processing sequence and so on
|
||||
assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
|
||||
memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength),
|
||||
assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
|
||||
memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
|
||||
channels * sizeof(SAMPLETYPE) * overlapLength);
|
||||
|
||||
// Remove the processed samples from the input buffer. Update
|
||||
@ -732,7 +740,7 @@ void TDStretch::acceptNewOverlapLength(int newOverlapLength)
|
||||
|
||||
// Operator 'new' is overloaded so that it automatically creates a suitable instance
|
||||
// depending on if we've a MMX/SSE/etc-capable CPU available or not.
|
||||
void * TDStretch::operator new(size_t s)
|
||||
void * TDStretch::operator new(size_t)
|
||||
{
|
||||
// Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
|
||||
ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
|
||||
@ -776,7 +784,7 @@ TDStretch * TDStretch::newInstance()
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Integer arithmetics specific algorithm implementations.
|
||||
// Integer arithmetic specific algorithm implementations.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@ -790,7 +798,7 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
||||
short temp;
|
||||
int cnt2;
|
||||
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
for (i = 0; i < overlapLength ; i ++)
|
||||
{
|
||||
temp = (short)(overlapLength - i);
|
||||
cnt2 = 2 * i;
|
||||
@ -802,21 +810,19 @@ void TDStretch::overlapStereo(short *poutput, const short *input) const
|
||||
|
||||
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
|
||||
// version of the routine.
|
||||
void TDStretch::overlapMulti(SAMPLETYPE *poutput, const SAMPLETYPE *input) const
|
||||
void TDStretch::overlapMulti(short *poutput, const short *input) const
|
||||
{
|
||||
SAMPLETYPE m1=(SAMPLETYPE)0;
|
||||
SAMPLETYPE m2;
|
||||
int i=0;
|
||||
short m1;
|
||||
int i = 0;
|
||||
|
||||
for (m2 = (SAMPLETYPE)overlapLength; m2; m2 --)
|
||||
for (m1 = 0; m1 < overlapLength; m1 ++)
|
||||
{
|
||||
short m2 = (short)(overlapLength - m1);
|
||||
for (int c = 0; c < channels; c ++)
|
||||
{
|
||||
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
|
||||
i++;
|
||||
}
|
||||
|
||||
m1++;
|
||||
}
|
||||
}
|
||||
|
||||
@ -861,25 +867,33 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
||||
unsigned long lnorm;
|
||||
int i;
|
||||
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||
#endif
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
corr = lnorm = 0;
|
||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
||||
// efficiency and gives slightly better resolution against rounding.
|
||||
// For mono it same routine, just unrolls loop by factor of 4
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i += 2)
|
||||
{
|
||||
corr += (mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||
lnorm += (mixingPos[i] * mixingPos[i] +
|
||||
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
lnorm += (mixingPos[i + 2] * mixingPos[i + 2] +
|
||||
mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
|
||||
// do intermediate scalings to avoid integer overflow
|
||||
}
|
||||
|
||||
if (lnorm > maxnorm)
|
||||
{
|
||||
maxnorm = lnorm;
|
||||
// modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
|
||||
#pragma omp critical
|
||||
if (lnorm > maxnorm)
|
||||
{
|
||||
maxnorm = lnorm;
|
||||
}
|
||||
}
|
||||
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
||||
// done using floating point operation
|
||||
@ -892,9 +906,12 @@ double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, do
|
||||
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
|
||||
{
|
||||
long corr;
|
||||
unsigned long lnorm;
|
||||
long lnorm;
|
||||
int i;
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
// cancel first normalizer tap from previous round
|
||||
lnorm = 0;
|
||||
for (i = 1; i <= channels; i ++)
|
||||
@ -903,15 +920,11 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
||||
}
|
||||
|
||||
corr = 0;
|
||||
// Same routine for stereo and mono. For stereo, unroll loop for better
|
||||
// efficiency and gives slightly better resolution against rounding.
|
||||
// For mono it same routine, just unrolls loop by factor of 4
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono.
|
||||
for (i = 0; i < ilength; i += 2)
|
||||
{
|
||||
corr += (mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm; // notice: do intermediate division here to avoid integer overflow
|
||||
corr += (mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBitsNorm;
|
||||
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
||||
}
|
||||
|
||||
// update normalizer with last samples of this round
|
||||
@ -936,7 +949,7 @@ double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *c
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Floating point arithmetics specific algorithm implementations.
|
||||
// Floating point arithmetic specific algorithm implementations.
|
||||
//
|
||||
|
||||
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
||||
@ -1012,27 +1025,24 @@ void TDStretch::calculateOverlapLength(int overlapInMsec)
|
||||
/// Calculate cross-correlation
|
||||
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
|
||||
{
|
||||
double corr;
|
||||
double norm;
|
||||
float corr;
|
||||
float norm;
|
||||
int i;
|
||||
|
||||
#ifdef ST_SIMD_AVOID_UNALIGNED
|
||||
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
||||
if (((ulongptr)mixingPos) & 15) return -1e50;
|
||||
#endif
|
||||
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
corr = norm = 0;
|
||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
||||
// For mono it's same routine yet unrollsd by factor of 4.
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
corr += mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1];
|
||||
|
||||
norm += mixingPos[i] * mixingPos[i] +
|
||||
mixingPos[i + 1] * mixingPos[i + 1];
|
||||
|
||||
// unroll the loop for better CPU efficiency:
|
||||
corr += mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3];
|
||||
|
||||
norm += mixingPos[i + 2] * mixingPos[i + 2] +
|
||||
mixingPos[i + 3] * mixingPos[i + 3];
|
||||
corr += mixingPos[i] * compare[i];
|
||||
norm += mixingPos[i] * mixingPos[i];
|
||||
}
|
||||
|
||||
anorm = norm;
|
||||
@ -1043,7 +1053,7 @@ double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, do
|
||||
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
||||
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
|
||||
{
|
||||
double corr;
|
||||
float corr;
|
||||
int i;
|
||||
|
||||
corr = 0;
|
||||
@ -1054,14 +1064,13 @@ double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *c
|
||||
norm -= mixingPos[-i] * mixingPos[-i];
|
||||
}
|
||||
|
||||
// Same routine for stereo and mono. For Stereo, unroll by factor of 2.
|
||||
// For mono it's same routine yet unrollsd by factor of 4.
|
||||
for (i = 0; i < channels * overlapLength; i += 4)
|
||||
// hint compiler autovectorization that loop length is divisible by 8
|
||||
int ilength = (channels * overlapLength) & -8;
|
||||
|
||||
// Same routine for stereo and mono
|
||||
for (i = 0; i < ilength; i ++)
|
||||
{
|
||||
corr += mixingPos[i] * compare[i] +
|
||||
mixingPos[i + 1] * compare[i + 1] +
|
||||
mixingPos[i + 2] * compare[i + 2] +
|
||||
mixingPos[i + 3] * compare[i + 3];
|
||||
corr += mixingPos[i] * compare[i];
|
||||
}
|
||||
|
||||
// update normalizer with last samples of this round
|
||||
|
Reference in New Issue
Block a user