Sound System Rework: Phase 2

. Performance boost (Completely non-blocking between Sound thread and CPU thread, in the meantime keeping them thread safe) . Both 32KHz & 48KHz sound can be handled properly now (But up-sampling is still not implemented, and I don't think any game requires it.) . Strategy adjustment When your PC is *NOT* capable to run the game at 100%: >> DSound Could yield more fluent sound than OpenAL sometimes, but you will lose the sync between video & audio (since audio is played before video to guarantee fluency) >> OpenAL Ensures video & audio are always sync'ed, but sound could be intermittent(to let slow video catch up) . Changed default frame limit to: Auto (Somehow this can dramatically decrease the chance of wiimote desync in game NSMB) git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@4724 8ced0084-cf51-0410-be5f-012b33b47a6e
2025-07-23 14:19:46 -06:00 · 2009-12-23 15:34:14 +00:00
parent 0d0a7c515f
commit 9eea60ca69
27 changed files with 358 additions and 314 deletions
--- a/Source/Core/AudioCommon/Src/AudioCommon.h
+++ b/Source/Core/AudioCommon/Src/AudioCommon.h
@ -30,6 +30,31 @@ extern DSPInitialize g_dspInitialize;
 extern SoundStream *soundStream;
 extern AudioCommonConfig ac_Config;

+// UDSPControl
+union UDSPControl
+{
+	u16 Hex;
+	struct
+	{
+		unsigned DSPReset       : 1; // Write 1 to reset and waits for 0
+		unsigned DSPAssertInt   : 1;
+		unsigned DSPHalt        : 1;
+
+		unsigned AI             : 1;
+		unsigned AI_mask        : 1;
+		unsigned ARAM           : 1;
+		unsigned ARAM_mask      : 1;
+		unsigned DSP            : 1;
+		unsigned DSP_mask       : 1;
+
+		unsigned ARAM_DMAState  : 1; // DSPGetDMAStatus() uses this flag
+		unsigned DSPInitCode    : 1;
+		unsigned DSPInit        : 1; // DSPInit() writes to this flag
+		unsigned pad            : 4;
+	};
+	UDSPControl(u16 _Hex = 0) : Hex(_Hex) {}
+};
+
 namespace AudioCommon 
 {
 	SoundStream *InitSoundStream(CMixer *mixer = NULL);
--- a/Source/Core/AudioCommon/Src/DSoundStream.cpp
+++ b/Source/Core/AudioCommon/Src/DSoundStream.cpp
@ -111,13 +111,11 @@ void DSound::SoundLoop()
 		int numBytesToRender = FIX128(ModBufferSize(currentPos - lastPos));
 		if (numBytesToRender >= 256)
 		{
-			if (numBytesToRender > sizeof(realtimeBuffer))
+			if (numBytesToRender > sizeof(realtimeBuffer) * sizeof(short))
 				PanicAlert("soundThread: too big render call");
-			m_mixer->Mix(realtimeBuffer, numBytesToRender >> 2);
+			m_mixer->Mix(realtimeBuffer, numBytesToRender / 4);
 			WriteDataToBuffer(lastPos, (char*)realtimeBuffer, numBytesToRender);
-			currentPos = ModBufferSize(lastPos + numBytesToRender);
-			totalRenderedBytes += numBytesToRender;
-			lastPos = currentPos;
+			lastPos = ModBufferSize(lastPos + numBytesToRender);
 		}
 		soundCriticalSection.Leave();
 		soundSyncEvent.Wait();
@ -142,7 +140,6 @@ bool DSound::Start()
 	dsBuffer->Lock(0, bufferSize, (void* *)&p1, &num1, 0, 0, 0);
 	memset(p1, 0, num1);
 	dsBuffer->Unlock(p1, num1, 0, 0);
-	totalRenderedBytes = -bufferSize;
 	thread = new Common::Thread(soundThread, (void *)this);
 	return true;
 }
--- a/Source/Core/AudioCommon/Src/DSoundStream.h
+++ b/Source/Core/AudioCommon/Src/DSoundStream.h
@ -25,8 +25,7 @@
 #include <mmsystem.h>
 #include <dsound.h>

-#define BUFSIZE 32768
-#define MAXWAIT 70   // miliseconds
+#define BUFSIZE (1024 * 8 * 4)
 #endif

 class DSound : public SoundStream
@ -41,31 +40,30 @@ class DSound : public SoundStream
    IDirectSoundBuffer* dsBuffer;
    
    int bufferSize;     //i bytes
-    int totalRenderedBytes;
 	int m_volume;
    
    // playback position
    int currentPos;
    int lastPos;
-    short realtimeBuffer[1024 * 1024];
+    short realtimeBuffer[BUFSIZE / sizeof(short)];
    
-    inline int FIX128(int x) {
+    inline int FIX128(int x)
+	{
 		return x & (~127);
    }

-    inline int ModBufferSize(int x) {
+    inline int ModBufferSize(int x)
+	{
 		return (x + bufferSize) % bufferSize;
    }

    bool CreateBuffer();
-    bool WriteDataToBuffer(DWORD dwOffset, char* soundData,
-			   DWORD dwSoundBytes);
+    bool WriteDataToBuffer(DWORD dwOffset, char* soundData, DWORD dwSoundBytes);

 public:
 	DSound(CMixer *mixer, void *hWnd = NULL)
 		: SoundStream(mixer)
 		, bufferSize(0)
-		, totalRenderedBytes(0)
 		, currentPos(0)
 		, lastPos(0)
 		, dsBuffer(0)
--- a/Source/Core/AudioCommon/Src/Mixer.cpp
+++ b/Source/Core/AudioCommon/Src/Mixer.cpp
@ -16,112 +16,65 @@
 // http://code.google.com/p/dolphin-emu/


-// This queue solution is temporary. I'll implement something more efficient later.
-#include <queue> // System
-
-#include "Thread.h" // Common
+#include "Atomic.h"

 #include "Mixer.h"
-#include "FixedSizeQueue.h"
 #include "AudioCommon.h"

-int CMixer::Mix(short *samples, int numSamples)
+// Executed from sound stream thread
+unsigned int CMixer::Mix(short* samples, unsigned int numSamples)
 {
-	if (! samples) {
-		Premix(NULL, 0);
+	if (!samples)
 		return 0;
-	}
-	// silence
-	memset(samples, 0, numSamples * 2 * sizeof(short));

-	if (g_dspInitialize.pEmulatorState) {
+	if (g_dspInitialize.pEmulatorState)
+	{
 		if (*g_dspInitialize.pEmulatorState != 0)
-			return 0;
-	}
-
-	// first get the DTK Music
-	if (m_EnableDTKMusic) {
-		g_dspInitialize.pGetAudioStreaming(samples, numSamples);
-	}
-
-	Premix(samples, numSamples);
-	
-	int count = 0;
-
-	push_sync.Enter();
-	while (m_queueSize > queue_minlength && count < numSamples * 2) 
-	{
-		int x = samples[count];
-		x += sample_queue.front();
-		if (x > 32767) x = 32767;
-		if (x < -32767) x = -32767;
-		samples[count++] = x;
-		sample_queue.pop();
-		x = samples[count];
-		x += sample_queue.front();
-		if (x > 32767) x = 32767;
-		if (x < -32767) x = -32767;
-		samples[count++] = x;
-		sample_queue.pop();
-		m_queueSize-=2;
-	}
-	push_sync.Leave();
-
-	return count;
-}
-
-
-void CMixer::PushSamples(short *samples, int num_stereo_samples, int core_sample_rate)
-{
-	push_sync.Enter();
-	if (m_queueSize == 0)
-	{
-		m_queueSize = queue_minlength;
-		for (int i = 0; i < queue_minlength; i++)
-			sample_queue.push((s16)0);
-	}
-	push_sync.Leave();
- 
-#ifdef _WIN32
-	if (GetAsyncKeyState(VK_TAB))
-		return;
-#endif
-
-	// Write Other Audio
-	if (!m_throttle)
-		return;	
-	
-	// -----------------------------------------------------------------------	
-	// The auto throttle function. This loop will put a ceiling on the CPU MHz.
-	// ----------------------------
-	/* This is only needed for non-AX sound, currently directly
-	   streamed and DTK sound. For AX we call SoundStream::Update in
-	   AXTask() for example. */
-	while (m_queueSize > queue_maxlength / 2)
-	{
-		// Urgh.
-		if (g_dspInitialize.pEmulatorState) {
-			if (*g_dspInitialize.pEmulatorState != 0) 
-				return;
+		{
+			// Silence
+			memset(samples, 0, numSamples * 4);
+			return numSamples;
 		}
-		soundStream->Update();
-		SLEEP(1);
 	}
-	// -----------------------------------------------------------------------

-	push_sync.Enter();
-	while (num_stereo_samples)
+	unsigned int numLeft = Common::AtomicLoad(m_numSamples);
+	numLeft = (numLeft > numSamples) ? numSamples : numLeft;
+
+	// Do re-sampling if needed
+	if (m_sampleRate == m_dspSampleRate)
 	{
-		sample_queue.push(Common::swap16(*samples));
-		samples++;
-		sample_queue.push(Common::swap16(*samples));
-		samples++;
-		m_queueSize += 2;
-		num_stereo_samples--;
+		for (unsigned int i = 0; i < numLeft * 2; i++)
+			samples[i] = Common::swap16(m_buffer[(m_indexR + i) & INDEX_MASK]);
+		m_indexR += numLeft * 2;
 	}
-	push_sync.Leave();
-	return;
+	else if (m_sampleRate < m_dspSampleRate) // If down-sampling needed
+	{
+		_dbg_assert_msg_(DSPHLE, !(numSamples % 2), "Number of Samples: %i must be even!", numSamples);

+		short *pDest = samples;
+		int last_l, last_r, cur_l, cur_r;
+
+		for (unsigned int i = 0; i < numLeft * 3 / 2; i++)
+		{
+			cur_l = Common::swap16(m_buffer[(m_indexR + i * 2) & INDEX_MASK]);
+			cur_r = Common::swap16(m_buffer[(m_indexR + i * 2 + 1) & INDEX_MASK]);
+
+			if (i % 3)
+			{
+				*pDest++ = (last_l + cur_r) / 2;
+				*pDest++ = (last_r + cur_r) / 2;
+			}
+
+			last_l = cur_l;
+			last_r = cur_r;
+		}
+
+		m_indexR += numLeft * 2 * 3 / 2;
+	}
+	else if (m_sampleRate > m_dspSampleRate)
+	{
+		// AyuanX: Up-sampling is not implemented yet
+		PanicAlert("Mixer: Up-sampling is not implemented yet!");
 /*
 	static int PV1l=0,PV2l=0,PV3l=0,PV4l=0;
 	static int PV1r=0,PV2r=0,PV3r=0,PV4r=0;
@ -183,16 +136,93 @@ void CMixer::PushSamples(short *samples, int num_stereo_samples, int core_sample
 		sample_queue.push(r);
 		m_queueSize += 2;
 	}
-	push_sync.Leave();
 */
+	}

+	// Padding
+	if (numSamples > numLeft)
+		memset(&samples[numLeft * 2], 0, (numSamples - numLeft) * 4);
+
+	// Add the HLE sound
+	if (m_sampleRate < m_dspSampleRate)
+	{
+		PanicAlert("Mixer: DSPHLE down-sampling is not implemented yet!\n"
+			"Usually no game should require this, please report!");
+	}
+	else
+	{
+		Premix(samples, numSamples, m_sampleRate);
+	}
+
+	// Add the DTK Music
+	if (m_EnableDTKMusic)
+	{
+		// Re-sampling is done inside
+		g_dspInitialize.pGetAudioStreaming(samples, numSamples, m_sampleRate);
+	}
+
+	Common::AtomicAdd(m_numSamples, -(int)numLeft);
+
+	return numSamples;
 }

-int CMixer::GetNumSamples()
+
+void CMixer::PushSamples(short *samples, unsigned int num_samples, unsigned int sample_rate)
 {
-	return m_queueSize / 2;
-	//int ret = (m_queueSize - queue_minlength) / 2;
-	//ret = (ret > 0) ? ret : 0;
-	//return ret;
+	// The auto throttle function. This loop will put a ceiling on the CPU MHz.
+	if (m_throttle)
+	{
+		// AyuanX: Remember to reserve "num_samples * 1.5" free sample space at least!
+		// Becuse we may do re-sampling later
+		while (Common::AtomicLoad(m_numSamples) >= MAX_SAMPLES - RESERVED_SAMPLES)
+		{
+			if (g_dspInitialize.pEmulatorState)
+			{
+				if (*g_dspInitialize.pEmulatorState != 0) 
+					break;
+			}
+			soundStream->Update();
+			SLEEP(1);
+		}
+	}
+
+	// Check if we have enough free space
+	if (num_samples > MAX_SAMPLES - Common::AtomicLoad(m_numSamples))
+		return;
+
+	// AyuanX: Actual re-sampling work has been moved to sound thread
+	// to alleviates the workload on main thread
+	// and we simply store raw data here to make fast mem copy
+	int over_bytes = num_samples * 4 - (MAX_SAMPLES * 2 - (m_indexW & INDEX_MASK)) * sizeof(short);
+	if (over_bytes > 0)
+	{
+		memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4 - over_bytes);
+		memcpy(&m_buffer[0], samples + (num_samples * 4 - over_bytes) / sizeof(short), over_bytes);
+	}
+	else
+	{
+		memcpy(&m_buffer[m_indexW & INDEX_MASK], samples, num_samples * 4);
+	}
+
+	m_indexW += num_samples * 2;
+
+	if (m_sampleRate < m_dspSampleRate)
+	{
+		// This is kind of tricky :P  
+		num_samples = num_samples * 2 / 3;
+	}
+	else if (m_sampleRate > m_dspSampleRate)
+	{
+		PanicAlert("Mixer: Up-sampling is not implemented yet!");
+	}
+
+	Common::AtomicAdd(m_numSamples, num_samples);
+
+	return;
+}
+
+unsigned int CMixer::GetNumSamples()
+{
+	return Common::AtomicLoad(m_numSamples);
 }

--- a/Source/Core/AudioCommon/Src/Mixer.h
+++ b/Source/Core/AudioCommon/Src/Mixer.h
@ -18,39 +18,38 @@
 #ifndef _MIXER_H_
 #define _MIXER_H_

-#include "FixedSizeQueue.h"
-#include "Thread.h"
-
-// On real hardware, this fifo is much, much smaller. But timing is also
-// tighter than under Windows, so...
-#define queue_minlength  1024 * 4
-#define queue_maxlength  1024 * 28
+// 16 bit Stereo
+#define MAX_SAMPLES			(1024 * 4)
+#define INDEX_MASK			(MAX_SAMPLES * 2 - 1)
+#define RESERVED_SAMPLES	(MAX_SAMPLES / 8)

 class CMixer {
 	
 public:
-	// AyuanX: Mixer sample rate is fixed to 32khz for now
-	// if any game sets DSP sample rate to 48khz, we are doomed
-	// TODO: Fix this somehow!
-	CMixer(unsigned int SampleRate = 32000)
-		: m_sampleRate(SampleRate)
+	CMixer(unsigned int AISampleRate = 48000, unsigned int DSPSampleRate = 48000)
+		: m_aiSampleRate(AISampleRate)
+		, m_dspSampleRate(DSPSampleRate)
 		, m_bits(16)
 		, m_channels(2)
-		, m_mode(2)
 		, m_HLEready(false)
-		, m_queueSize(0)
-	{}
+		, m_numSamples(0)
+		, m_indexW(0)
+		, m_indexR(0)
+	{
+		// AyuanX: When sample rate differs, we have to do re-sampling
+		// I perfer speed so let's do down-sampling instead of up-sampling
+		// If you like better sound than speed, feel free to implement the up-sampling code
+		m_sampleRate = (m_aiSampleRate < m_dspSampleRate) ? m_aiSampleRate : m_dspSampleRate;
+	}

 	// Called from audio threads
-	virtual int Mix(short *sample, int numSamples);
-	virtual int GetNumSamples();
+	virtual unsigned int Mix(short* samples, unsigned int numSamples);
+	virtual void Premix(short *samples, unsigned int numSamples, unsigned int sampleRate) {}
+	unsigned int GetNumSamples();

 	// Called from main thread
-	virtual void PushSamples(short* samples, int num_stereo_samples, int core_sample_rate);
-	
-	virtual void Premix(short *samples, int numSamples) {}
-
-	int GetSampleRate() {return m_sampleRate;}
+	virtual void PushSamples(short* samples, unsigned int num_samples, unsigned int sample_rate);
+	unsigned int GetSampleRate() {return m_sampleRate;}

 	void SetThrottle(bool use) { m_throttle = use;}
 	void SetDTKMusic(bool use) { m_EnableDTKMusic = use;}
@ -61,19 +60,23 @@ public:
 	// ---------------------

 protected:
-	int m_sampleRate;
+	unsigned int m_sampleRate;
+	unsigned int m_aiSampleRate;
+	unsigned int m_dspSampleRate;
 	int m_bits;
 	int m_channels;
 	
-	int m_mode;
 	bool m_HLEready;
-	int m_queueSize;

 	bool m_EnableDTKMusic;
 	bool m_throttle;
+
+	short m_buffer[MAX_SAMPLES * 2];
+	u32 m_indexW;
+	u32 m_indexR;
+	volatile u32 m_numSamples;
+
 private:
-	Common::CriticalSection push_sync;
-	FixedSizeQueue<s16, queue_maxlength> sample_queue;

 };

--- a/Source/Core/AudioCommon/Src/NullSoundStream.h
+++ b/Source/Core/AudioCommon/Src/NullSoundStream.h
@ -22,10 +22,10 @@
 #include "Mixer.h"

 class NullMixer : public CMixer {
+
 public:
-	virtual int Mix(short *sample, int numSamples) {return 0;}
-	virtual void PushSamples(short* samples, int num_stereo_samples, 
-							 int core_sample_rate) {}
+	virtual unsigned int Mix(short *samples, unsigned int numSamples) { return 0; }
+	virtual void PushSamples(short* samples, unsigned int num_samples, unsigned int sample_rate) {}
 };

 class NullSound : public SoundStream
@ -35,7 +35,6 @@ public:
 	{
 		delete m_mixer;
 		m_mixer = new NullMixer();
-	   
 	}
    
    virtual ~NullSound() {}
@ -47,7 +46,7 @@ public:
 	virtual bool Start() { return true; }

 	virtual void Update() { 
-		m_mixer->Mix(NULL, 256 >> 2);
+		//m_mixer->Mix(NULL, 256 >> 2);
 		//(*callback)(NULL, 256 >> 2, 16, sampleRate, 2); 
 	}
 };
--- a/Source/Core/AudioCommon/Src/OpenALStream.cpp
+++ b/Source/Core/AudioCommon/Src/OpenALStream.cpp
@ -138,12 +138,13 @@ void OpenALStream::SoundLoop()
 	// Generate a Source to playback the Buffers
 	alGenSources(1, &uiSource);

-	memset(realtimeBuffer, 0, OAL_BUFFER_SIZE);
+	// Short Silence
+	memset(realtimeBuffer, 0, OAL_MAX_SAMPLES * 4);
 	for (int i = 0; i < OAL_NUM_BUFFERS; i++)
-		alBufferData(uiBuffers[i], AL_FORMAT_STEREO16, realtimeBuffer, OAL_BUFFER_SIZE, ulFrequency);
-
+		alBufferData(uiBuffers[i], AL_FORMAT_STEREO16, realtimeBuffer, OAL_MAX_SAMPLES, ulFrequency);
 	alSourceQueueBuffers(uiSource, OAL_NUM_BUFFERS, uiBuffers);
 	alSourcePlay(uiSource);
+
 	err = alGetError();
 	// TODO: Error handling

@ -158,12 +159,12 @@ void OpenALStream::SoundLoop()
 			alGetSourcei(uiSource, AL_BUFFERS_PROCESSED, &iBuffersProcessed);
 			iBuffersFilled = 0;
 		}
-		int numSamples = m_mixer->GetNumSamples();
-		numSamples &= ~0x100;

-		if (iBuffersProcessed && numSamples)
+		unsigned int numSamples = m_mixer->GetNumSamples();
+
+		if (iBuffersProcessed && (numSamples >= OAL_THRESHOLD))
 		{
-			numSamples = (numSamples > OAL_BUFFER_SIZE / 4) ? OAL_BUFFER_SIZE / 4 : numSamples;
+			numSamples = (numSamples > OAL_MAX_SAMPLES) ? OAL_MAX_SAMPLES : numSamples;
 			// Remove the Buffer from the Queue.  (uiBuffer contains the Buffer ID for the unqueued Buffer)
 			if (iBuffersFilled == 0)
 				alSourceUnqueueBuffers(uiSource, iBuffersProcessed, uiBufferTemp);
@ -176,11 +177,11 @@ void OpenALStream::SoundLoop()
 			if (iBuffersFilled == OAL_NUM_BUFFERS)
 				alSourcePlay(uiSource);
 		}
-		else
+		else if (numSamples >= OAL_THRESHOLD)
 		{
 			ALint state = 0;
 			alGetSourcei(uiSource, AL_SOURCE_STATE, &state);
-			if (state != AL_PLAYING)
+			if (state == AL_STOPPED)
 				alSourcePlay(uiSource);
 		}
 		soundSyncEvent.Wait();
--- a/Source/Core/AudioCommon/Src/OpenALStream.h
+++ b/Source/Core/AudioCommon/Src/OpenALStream.h
@ -33,10 +33,11 @@
 #include "AL/al.h"
 #include "AL/alc.h"
 #endif // WIN32
-// public use
+// 16 bit Stereo
 #define SFX_MAX_SOURCE		1
 #define OAL_NUM_BUFFERS		8
-#define OAL_BUFFER_SIZE		(512 * 4)
+#define OAL_MAX_SAMPLES		512		// AyuanX: Don't make it too large, as larger buffer means longer delay
+#define OAL_THRESHOLD		128
 #endif

 class OpenALStream: public SoundStream
@ -66,7 +67,7 @@ private:
 	Common::CriticalSection soundCriticalSection;
 	Common::Event soundSyncEvent;
 	
-	short realtimeBuffer[OAL_BUFFER_SIZE/sizeof(short)];
+	short realtimeBuffer[OAL_MAX_SAMPLES * 2];
 	ALuint uiBuffers[OAL_NUM_BUFFERS];
 	ALuint uiSource;
 	ALfloat fVolume;