fix AudioCommon::Mixer Buffer indices

This fix the 1h32 audio bug which outputs static sound after 1h32.

The mixer is used for 32->48kHz resampling and as output buffer for the async audio backends.
So this buffer was indiced by a writing and a reading pointer and the count of samples in it.
As this is redundant and the sample count isn't accurate calculateable because of the interpolation,
both indices gets out of sync. So after some time (~92min), this buffer overflows and return only garbage.

thx @ moosehunter + delroth for debugging on this issue. You did the most work :-)
This commit is contained in:
degasus 2013-07-11 21:22:38 +02:00
parent 95d4dc92c1
commit a1822a3aca
2 changed files with 62 additions and 39 deletions

View File

@ -32,7 +32,7 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
return numSamples; return numSamples;
} }
unsigned int numLeft = Common::AtomicLoad(m_numSamples); unsigned int numLeft = GetNumSamples();
if (m_AIplaying) { if (m_AIplaying) {
if (numLeft < numSamples)//cannot do much about this if (numLeft < numSamples)//cannot do much about this
m_AIplaying = false; m_AIplaying = false;
@ -43,6 +43,16 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
m_AIplaying = true; m_AIplaying = true;
} }
// Cache access in non-volatile variable
// This is the only function changing the read value, so it's safe to
// cache it locally although it's written here.
// The writing pointer will be modified outside, but it will only increase,
// so we will just ignore new written data while interpolating.
// Without this cache, the compiler wouldn't be allowed to optimize the
// interpolation loop.
u32 indexR = Common::AtomicLoad(m_indexR);
u32 indexW = Common::AtomicLoad(m_indexW);
if (m_AIplaying) { if (m_AIplaying) {
numLeft = (numLeft > numSamples) ? numSamples : numLeft; numLeft = (numLeft > numSamples) ? numSamples : numLeft;
@ -57,7 +67,7 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
for (unsigned int i = 0; i < numLeft * 2; i += 8) for (unsigned int i = 0; i < numLeft * 2; i += 8)
{ {
_mm_storeu_si128((__m128i *)&samples[i], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&m_buffer[(m_indexR + i) & INDEX_MASK]), sr_mask)); _mm_storeu_si128((__m128i *)&samples[i], _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)&m_buffer[(indexR + i) & INDEX_MASK]), sr_mask));
} }
} }
else else
@ -65,38 +75,38 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
{ {
for (unsigned int i = 0; i < numLeft * 2; i+=2) for (unsigned int i = 0; i < numLeft * 2; i+=2)
{ {
samples[i] = Common::swap16(m_buffer[(m_indexR + i + 1) & INDEX_MASK]); samples[i] = Common::swap16(m_buffer[(indexR + i + 1) & INDEX_MASK]);
samples[i+1] = Common::swap16(m_buffer[(m_indexR + i) & INDEX_MASK]); samples[i+1] = Common::swap16(m_buffer[(indexR + i) & INDEX_MASK]);
} }
} }
m_indexR += numLeft * 2; indexR += numLeft * 2;
} }
else //linear interpolation else //linear interpolation
{ {
//render numleft sample pairs to samples[] //render numleft sample pairs to samples[]
//advance m_indexR with sample position //advance indexR with sample position
//remember fractional offset //remember fractional offset
static u32 frac = 0; static u32 frac = 0;
const u32 ratio = (u32)( 65536.0f * (float)AudioInterface::GetAIDSampleRate() / (float)m_sampleRate ); const u32 ratio = (u32)( 65536.0f * (float)AudioInterface::GetAIDSampleRate() / (float)m_sampleRate );
for (u32 i = 0; i < numLeft * 2; i+=2) { for (u32 i = 0; i < numLeft * 2; i+=2) {
u32 m_indexR2 = m_indexR + 2; //next sample u32 indexR2 = indexR + 2; //next sample
if ((m_indexR2 & INDEX_MASK) == (m_indexW & INDEX_MASK)) //..if it exists if ((indexR2 & INDEX_MASK) == (indexW & INDEX_MASK)) //..if it exists
m_indexR2 = m_indexR; indexR2 = indexR;
s16 l1 = Common::swap16(m_buffer[m_indexR & INDEX_MASK]); //current s16 l1 = Common::swap16(m_buffer[indexR & INDEX_MASK]); //current
s16 l2 = Common::swap16(m_buffer[m_indexR2 & INDEX_MASK]); //next s16 l2 = Common::swap16(m_buffer[indexR2 & INDEX_MASK]); //next
int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16; int sampleL = ((l1 << 16) + (l2 - l1) * (u16)frac) >> 16;
samples[i+1] = sampleL; samples[i+1] = sampleL;
s16 r1 = Common::swap16(m_buffer[(m_indexR + 1) & INDEX_MASK]); //current s16 r1 = Common::swap16(m_buffer[(indexR + 1) & INDEX_MASK]); //current
s16 r2 = Common::swap16(m_buffer[(m_indexR2 + 1) & INDEX_MASK]); //next s16 r2 = Common::swap16(m_buffer[(indexR2 + 1) & INDEX_MASK]); //next
int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16; int sampleR = ((r1 << 16) + (r2 - r1) * (u16)frac) >> 16;
samples[i] = sampleR; samples[i] = sampleR;
frac += ratio; frac += ratio;
m_indexR += 2 * (u16)(frac >> 16); indexR += 2 * (u16)(frac >> 16);
frac &= 0xffff; frac &= 0xffff;
} }
} }
@ -109,12 +119,15 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
if (numSamples > numLeft) if (numSamples > numLeft)
{ {
unsigned short s[2]; unsigned short s[2];
s[0] = Common::swap16(m_buffer[(m_indexR - 1) & INDEX_MASK]); s[0] = Common::swap16(m_buffer[(indexR - 1) & INDEX_MASK]);
s[1] = Common::swap16(m_buffer[(m_indexR - 2) & INDEX_MASK]); s[1] = Common::swap16(m_buffer[(indexR - 2) & INDEX_MASK]);
for (unsigned int i = numLeft*2; i < numSamples*2; i+=2) for (unsigned int i = numLeft*2; i < numSamples*2; i+=2)
*(u32*)(samples+i) = *(u32*)(s); *(u32*)(samples+i) = *(u32*)(s);
// memset(&samples[numLeft * 2], 0, (numSamples - numLeft) * 4); // memset(&samples[numLeft * 2], 0, (numSamples - numLeft) * 4);
} }
// Flush cached variable
Common::AtomicStore(m_indexR, indexR);
//when logging, also throttle HLE audio //when logging, also throttle HLE audio
if (m_logAudio) { if (m_logAudio) {
@ -135,19 +148,21 @@ unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
AudioInterface::Callback_GetStreaming(samples, numSamples, m_sampleRate); AudioInterface::Callback_GetStreaming(samples, numSamples, m_sampleRate);
} }
Common::AtomicAdd(m_numSamples, -(s32)numLeft);
return numSamples; return numSamples;
} }
void CMixer::PushSamples(const short *samples, unsigned int num_samples) void CMixer::PushSamples(const short *samples, unsigned int num_samples)
{ {
// Cache access in non-volatile variable
// indexR isn't allowed to cache in the audio throttling loop as it
// needs to get updates to not deadlock.
u32 indexW = Common::AtomicLoad(m_indexW);
if (m_throttle) if (m_throttle)
{ {
// The auto throttle function. This loop will put a ceiling on the CPU MHz. // The auto throttle function. This loop will put a ceiling on the CPU MHz.
while (num_samples + Common::AtomicLoad(m_numSamples) > MAX_SAMPLES) while (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
{ {
if (*PowerPC::GetStatePtr() != PowerPC::CPU_RUNNING || soundStream->IsMuted()) if (*PowerPC::GetStatePtr() != PowerPC::CPU_RUNNING || soundStream->IsMuted())
break; break;
@ -160,37 +175,47 @@ void CMixer::PushSamples(const short *samples, unsigned int num_samples)
} }
// Check if we have enough free space // Check if we have enough free space
if (num_samples + Common::AtomicLoad(m_numSamples) > MAX_SAMPLES) // indexW == m_indexR results in empty buffer, so indexR must always be smaller than indexW
if (num_samples * 2 + ((indexW - Common::AtomicLoad(m_indexR)) & INDEX_MASK) >= MAX_SAMPLES * 2)
return; return;
// AyuanX: Actual re-sampling work has been moved to sound thread // AyuanX: Actual re-sampling work has been moved to sound thread
// to alleviate the workload on main thread // to alleviate the workload on main thread
// and we simply store raw data here to make fast mem copy // and we simply store raw data here to make fast mem copy
int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (m_indexW & INDEX_MASK)) * sizeof(short); int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (indexW & INDEX_MASK)) * sizeof(short);
if (over_bytes > 0) if (over_bytes > 0)
{ {
memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes); memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes);
memcpy(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes); memcpy(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes);
} }
else else
{ {
memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4); memcpy(&m_buffer[indexW & INDEX_MASK], samples, num_samples * 4);
} }
m_indexW += num_samples * 2; Common::AtomicAdd(m_indexW, num_samples * 2);
if (AudioInterface::GetAIDSampleRate() == m_sampleRate)
Common::AtomicAdd(m_numSamples, num_samples);
else if ((AudioInterface::GetAIDSampleRate() == 32000) && (m_sampleRate == 48000))
Common::AtomicAdd(m_numSamples, num_samples * 3 / 2);
else
Common::AtomicAdd(m_numSamples, num_samples * 2 / 3);
return; return;
} }
unsigned int CMixer::GetNumSamples() unsigned int CMixer::GetNumSamples()
{ {
return Common::AtomicLoad(m_numSamples); // Guess how many samples would be available after interpolation.
// As interpolation needs at least on sample from the future to
// linear interpolate between them, one sample less is available.
// We also can't say the current interpolation state (specially
// the frac), so to be sure, subtract one again to be sure not
// to underflow the fifo.
u32 numSamples = ((Common::AtomicLoad(m_indexW) - Common::AtomicLoad(m_indexR)) & INDEX_MASK) / 2;
if (AudioInterface::GetAIDSampleRate() == m_sampleRate)
numSamples = numSamples; // 1:1
else if (m_sampleRate == 48000 && AudioInterface::GetAIDSampleRate() == 32000)
numSamples = numSamples * 3 / 2 - 2; // most common case
else
numSamples = numSamples * m_sampleRate / AudioInterface::GetAIDSampleRate() - 2;
return numSamples;
} }

View File

@ -23,7 +23,6 @@ public:
, m_channels(2) , m_channels(2)
, m_HLEready(false) , m_HLEready(false)
, m_logAudio(0) , m_logAudio(0)
, m_numSamples(0)
, m_indexW(0) , m_indexW(0)
, m_indexR(0) , m_indexR(0)
, m_AIplaying(true) , m_AIplaying(true)
@ -97,9 +96,8 @@ protected:
bool m_throttle; bool m_throttle;
short m_buffer[MAX_SAMPLES * 2]; short m_buffer[MAX_SAMPLES * 2];
volatile u32 m_numSamples; volatile u32 m_indexW;
u32 m_indexW; volatile u32 m_indexR;
u32 m_indexR;
bool m_AIplaying; bool m_AIplaying;
std::mutex m_csMixing; std::mutex m_csMixing;