diff --git a/Source/Core/Core/DSP/DSPAccelerator.cpp b/Source/Core/Core/DSP/DSPAccelerator.cpp index 1a8cbf2671..f6822ef2a8 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.cpp +++ b/Source/Core/Core/DSP/DSPAccelerator.cpp @@ -4,77 +4,65 @@ #include "Core/DSP/DSPAccelerator.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Common/MathUtil.h" -#include "Core/DSP/DSPCore.h" -#include "Core/DSP/DSPHWInterface.h" -#include "Core/DSP/DSPHost.h" - namespace DSP { -u16 dsp_read_aram_d3() +u16 Accelerator::ReadD3() { - // Zelda ucode reads ARAM through 0xffd3. - const u32 EndAddress = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL]; - u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; u16 val = 0; - switch (g_dsp.ifx_regs[DSP_FORMAT]) + switch (m_sample_format) { case 0x5: // u8 reads - val = Host::ReadHostMemory(Address); - Address++; + val = ReadMemory(m_current_address); + m_current_address++; break; case 0x6: // u16 reads - val = (Host::ReadHostMemory(Address * 2) << 8) | Host::ReadHostMemory(Address * 2 + 1); - Address++; + val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); + m_current_address++; break; default: - ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", m_sample_format); break; } - if (Address >= EndAddress) + if (m_current_address >= m_end_address) { // Set address back to start address. (never seen this here!) - Address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; + m_current_address = m_start_address; } - - g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16; - g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff; return val; } -void dsp_write_aram_d3(u16 value) +void Accelerator::WriteD3(u16 value) { // Zelda ucode writes a bunch of zeros to ARAM through d3 during // initialization. Don't know if it ever does it later, too. // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too) // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too) - u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; - switch (g_dsp.ifx_regs[DSP_FORMAT]) + switch (m_sample_format) { case 0xA: // u16 writes - Host::WriteHostMemory(value >> 8, Address * 2); - Host::WriteHostMemory(value & 0xFF, Address * 2 + 1); - Address++; + WriteMemory(m_current_address * 2, value >> 8); + WriteMemory(m_current_address * 2 + 1, value & 0xFF); + m_current_address++; break; default: - ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", m_sample_format); break; } - - g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16; - g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff; } -u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, - s16* yn1, s16* yn2, u16* pred_scale, s16* coefs, - std::function end_exception) +u16 Accelerator::Read(s16* coefs) { + if (m_reads_stopped) + return 0x0000; + u16 val; u8 step_size_bytes = 0; @@ -84,69 +72,69 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1 // extension and do/do not use ADPCM. It also remains to be figured out // whether there's a difference between the usual accelerator "read // address" and 0xd3. - switch (sample_format) + switch (m_sample_format) { case 0x00: // ADPCM audio { - // ADPCM decoding, not much to explain here. - if ((*current_address & 15) == 0) - { - *pred_scale = Host::ReadHostMemory((*current_address & ~15) >> 1); - *current_address += 2; - } - - switch (end_address & 15) - { - case 0: // Tom and Jerry - step_size_bytes = 1; - break; - case 1: // Blazing Angels - step_size_bytes = 0; - break; - default: - step_size_bytes = 2; - break; - } - - int scale = 1 << (*pred_scale & 0xF); - int coef_idx = (*pred_scale >> 4) & 0x7; + int scale = 1 << (m_pred_scale & 0xF); + int coef_idx = (m_pred_scale >> 4) & 0x7; s32 coef1 = coefs[coef_idx * 2 + 0]; s32 coef2 = coefs[coef_idx * 2 + 1]; - int temp = (*current_address & 1) ? (Host::ReadHostMemory(*current_address >> 1) & 0xF) : - (Host::ReadHostMemory(*current_address >> 1) >> 4); + int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) : + (ReadMemory(m_current_address >> 1) >> 4); if (temp >= 8) temp -= 16; - s32 val32 = (scale * temp) + ((0x400 + coef1 * *yn1 + coef2 * *yn2) >> 11); + s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11); val = static_cast(MathUtil::Clamp(val32, -0x7FFF, 0x7FFF)); + step_size_bytes = 2; - *yn2 = *yn1; - *yn1 = val; - *current_address += 1; + m_yn2 = m_yn1; + m_yn1 = val; + m_current_address += 1; + + // These two cases are handled in a special way, separate from normal overflow handling: + // the ACCOV exception does not fire at all, the predscale register is not updated, + // and if the end address is 16-byte aligned, the DSP loops to start_address + 1 + // instead of start_address. + if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address) + { + m_current_address = m_start_address + 1; + } + else if ((m_end_address & 0xf) == 0x1 && m_current_address == m_end_address - 1) + { + m_current_address = m_start_address; + } + // If any of these special cases were hit, the DSP does not update the predscale register. + else if ((m_current_address & 15) == 0) + { + m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); + m_current_address += 2; + step_size_bytes += 2; + } break; } case 0x0A: // 16-bit PCM audio - val = (Host::ReadHostMemory(*current_address * 2) << 8) | - Host::ReadHostMemory(*current_address * 2 + 1); - *yn2 = *yn1; - *yn1 = val; + val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); + m_yn2 = m_yn1; + m_yn1 = val; step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; break; case 0x19: // 8-bit PCM audio - val = Host::ReadHostMemory(*current_address) << 8; - *yn2 = *yn1; - *yn1 = val; + val = ReadMemory(m_current_address) << 8; + m_yn2 = m_yn1; + m_yn1 = val; step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; break; default: - ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]); + ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", m_sample_format); step_size_bytes = 2; - *current_address += 1; + m_current_address += 1; val = 0; break; } @@ -160,30 +148,66 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1 // Somehow, YN1 and YN2 must be initialized with their "loop" values, // so yeah, it seems likely that we should raise an exception to let // the DSP program do that, at least if DSP_FORMAT == 0x0A. - if (*current_address == (end_address + step_size_bytes - 1)) + if (m_current_address == (m_end_address + step_size_bytes - 1)) { // Set address back to start address. - *current_address = start_address; - end_exception(); + m_current_address = m_start_address; + m_reads_stopped = true; + OnEndException(); } + + SetCurrentAddress(m_current_address); return val; } -u16 dsp_read_accelerator() +void Accelerator::DoState(PointerWrap& p) { - const u32 start_address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL]; - const u32 end_address = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL]; - u32 current_address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL]; + p.Do(m_start_address); + p.Do(m_end_address); + p.Do(m_current_address); + p.Do(m_sample_format); + p.Do(m_yn1); + p.Do(m_yn2); + p.Do(m_pred_scale); + p.Do(m_reads_stopped); +} - auto end_address_reached = [] { DSPCore_SetException(EXP_ACCOV); }; - const u16 val = ReadAccelerator( - start_address, end_address, ¤t_address, g_dsp.ifx_regs[DSP_FORMAT], - reinterpret_cast(&g_dsp.ifx_regs[DSP_YN1]), - reinterpret_cast(&g_dsp.ifx_regs[DSP_YN2]), &g_dsp.ifx_regs[DSP_PRED_SCALE], - reinterpret_cast(&g_dsp.ifx_regs[DSP_COEF_A1_0]), end_address_reached); +constexpr u32 START_END_ADDRESS_MASK = 0x3fffffff; +constexpr u32 CURRENT_ADDRESS_MASK = 0xbfffffff; - gdsp_ifx_write(DSP_ACCAH, current_address >> 16); - gdsp_ifx_write(DSP_ACCAL, current_address & 0xffff); - return val; +void Accelerator::SetStartAddress(u32 address) +{ + m_start_address = address & START_END_ADDRESS_MASK; +} + +void Accelerator::SetEndAddress(u32 address) +{ + m_end_address = address & START_END_ADDRESS_MASK; +} + +void Accelerator::SetCurrentAddress(u32 address) +{ + m_current_address = address & CURRENT_ADDRESS_MASK; +} + +void Accelerator::SetSampleFormat(u16 format) +{ + m_sample_format = format; +} + +void Accelerator::SetYn1(s16 yn1) +{ + m_yn1 = yn1; +} + +void Accelerator::SetYn2(s16 yn2) +{ + m_yn2 = yn2; + m_reads_stopped = false; +} + +void Accelerator::SetPredScale(u16 pred_scale) +{ + m_pred_scale = pred_scale & 0x7f; } } // namespace DSP diff --git a/Source/Core/Core/DSP/DSPAccelerator.h b/Source/Core/Core/DSP/DSPAccelerator.h index 6cb7428b88..3ee0e9f2b0 100644 --- a/Source/Core/Core/DSP/DSPAccelerator.h +++ b/Source/Core/Core/DSP/DSPAccelerator.h @@ -4,18 +4,56 @@ #pragma once -#include - #include "Common/CommonTypes.h" +class PointerWrap; + namespace DSP { -u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format, - s16* yn1, s16* yn2, u16* pred_scale, s16* coefs, - std::function end_exception); +class Accelerator +{ +public: + virtual ~Accelerator() = default; -u16 dsp_read_accelerator(); + u16 Read(s16* coefs); + // Zelda ucode reads ARAM through 0xffd3. + u16 ReadD3(); + void WriteD3(u16 value); -u16 dsp_read_aram_d3(); -void dsp_write_aram_d3(u16 value); + u32 GetStartAddress() const { return m_start_address; } + u32 GetEndAddress() const { return m_end_address; } + u32 GetCurrentAddress() const { return m_current_address; } + u16 GetSampleFormat() const { return m_sample_format; } + s16 GetYn1() const { return m_yn1; } + s16 GetYn2() const { return m_yn2; } + u16 GetPredScale() const { return m_pred_scale; } + void SetStartAddress(u32 address); + void SetEndAddress(u32 address); + void SetCurrentAddress(u32 address); + void SetSampleFormat(u16 format); + void SetYn1(s16 yn1); + void SetYn2(s16 yn2); + void SetPredScale(u16 pred_scale); + + void DoState(PointerWrap& p); + +protected: + virtual void OnEndException() = 0; + virtual u8 ReadMemory(u32 address) = 0; + virtual void WriteMemory(u32 address, u8 value) = 0; + + // DSP accelerator registers. + u32 m_start_address = 0; + u32 m_end_address = 0; + u32 m_current_address = 0; + u16 m_sample_format = 0; + s16 m_yn1 = 0; + s16 m_yn2 = 0; + u16 m_pred_scale = 0; + + // When an ACCOV is triggered, the accelerator stops reading back anything + // and updating the current address register, unless the YN2 register is written to. + // This is kept track of internally; this state is not exposed via any register. + bool m_reads_stopped = false; +}; } // namespace DSP diff --git a/Source/Core/Core/DSP/DSPCore.cpp b/Source/Core/Core/DSP/DSPCore.cpp index aec6b9dd50..3c92a4be54 100644 --- a/Source/Core/Core/DSP/DSPCore.cpp +++ b/Source/Core/Core/DSP/DSPCore.cpp @@ -15,12 +15,14 @@ #include "Common/MemoryUtil.h" #include "Common/MsgHandler.h" +#include "Core/DSP/DSPAccelerator.h" #include "Core/DSP/DSPAnalyzer.h" #include "Core/DSP/DSPHWInterface.h" #include "Core/DSP/DSPHost.h" #include "Core/DSP/Interpreter/DSPIntUtil.h" #include "Core/DSP/Interpreter/DSPInterpreter.h" #include "Core/DSP/Jit/DSPEmitter.h" +#include "Core/HW/DSP.h" namespace DSP { @@ -111,11 +113,21 @@ static void DSPCore_FreeMemoryPages() g_dsp.irom = g_dsp.iram = g_dsp.dram = g_dsp.coef = nullptr; } +class LLEAccelerator final : public Accelerator +{ +protected: + u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); } + void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); } + void OnEndException() override { DSPCore_SetException(EXP_ACCOV); } +}; + bool DSPCore_Init(const DSPInitOptions& opts) { g_dsp.step_counter = 0; g_init_hax = false; + g_dsp.accelerator = std::make_unique(); + g_dsp.irom = static_cast(Common::AllocateMemoryPages(DSP_IROM_BYTE_SIZE)); g_dsp.iram = static_cast(Common::AllocateMemoryPages(DSP_IRAM_BYTE_SIZE)); g_dsp.dram = static_cast(Common::AllocateMemoryPages(DSP_DRAM_BYTE_SIZE)); diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h index 5e4798ccdd..953dd210ad 100644 --- a/Source/Core/Core/DSP/DSPCore.h +++ b/Source/Core/Core/DSP/DSPCore.h @@ -16,6 +16,8 @@ namespace DSP { +class Accelerator; + namespace JIT { namespace x86 @@ -299,6 +301,8 @@ struct SDSP // Accelerator / DMA / other hardware registers. Not GPRs. std::array ifx_regs; + std::unique_ptr accelerator; + // When state saving, all of the above can just be memcpy'd into the save state. // The below needs special handling. u16* iram; diff --git a/Source/Core/Core/DSP/DSPHWInterface.cpp b/Source/Core/Core/DSP/DSPHWInterface.cpp index c965233a8e..09a700117c 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.cpp +++ b/Source/Core/Core/DSP/DSPHWInterface.cpp @@ -98,7 +98,7 @@ u16 gdsp_mbox_read_l(Mailbox mbx) return (u16)value; } -void gdsp_ifx_write(u32 addr, u32 val) +void gdsp_ifx_write(u32 addr, u16 val) { g_dsp_cap->LogIFXWrite(addr, val); @@ -136,10 +136,6 @@ void gdsp_ifx_write(u32 addr, u32 val) g_dsp.ifx_regs[DSP_DSBL] = 0; break; - case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda" - dsp_write_aram_d3(val); - break; - case DSP_GAIN: if (val) { @@ -151,21 +147,45 @@ void gdsp_ifx_write(u32 addr, u32 val) case DSP_DSCR: g_dsp.ifx_regs[addr & 0xFF] = val; break; - /* - case DSP_ACCAL: - dsp_step_accelerator(); - break; - */ - // Masking occurs for the start and end addresses as soon as the registers are written to. case DSP_ACSAH: - case DSP_ACEAH: - g_dsp.ifx_regs[addr & 0xff] = val & 0x3fff; + g_dsp.accelerator->SetStartAddress(val << 16 | + static_cast(g_dsp.accelerator->GetStartAddress())); + break; + case DSP_ACSAL: + g_dsp.accelerator->SetStartAddress( + static_cast(g_dsp.accelerator->GetStartAddress() >> 16) << 16 | val); + break; + case DSP_ACEAH: + g_dsp.accelerator->SetEndAddress(val << 16 | + static_cast(g_dsp.accelerator->GetEndAddress())); + break; + case DSP_ACEAL: + g_dsp.accelerator->SetEndAddress( + static_cast(g_dsp.accelerator->GetEndAddress() >> 16) << 16 | val); break; - - // This also happens for the current address, but with a different mask. case DSP_ACCAH: - g_dsp.ifx_regs[addr & 0xff] = val & 0xbfff; + g_dsp.accelerator->SetCurrentAddress(val << 16 | + static_cast(g_dsp.accelerator->GetCurrentAddress())); + break; + case DSP_ACCAL: + g_dsp.accelerator->SetCurrentAddress( + static_cast(g_dsp.accelerator->GetCurrentAddress() >> 16) << 16 | val); + break; + case DSP_FORMAT: + g_dsp.accelerator->SetSampleFormat(val); + break; + case DSP_YN1: + g_dsp.accelerator->SetYn1(val); + break; + case DSP_YN2: + g_dsp.accelerator->SetYn2(val); + break; + case DSP_PRED_SCALE: + g_dsp.accelerator->SetPredScale(val); + break; + case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda" + g_dsp.accelerator->WriteD3(val); break; default: @@ -208,11 +228,30 @@ static u16 _gdsp_ifx_read(u16 addr) case DSP_DSCR: return g_dsp.ifx_regs[addr & 0xFF]; + case DSP_ACSAH: + return static_cast(g_dsp.accelerator->GetStartAddress() >> 16); + case DSP_ACSAL: + return static_cast(g_dsp.accelerator->GetStartAddress()); + case DSP_ACEAH: + return static_cast(g_dsp.accelerator->GetEndAddress() >> 16); + case DSP_ACEAL: + return static_cast(g_dsp.accelerator->GetEndAddress()); + case DSP_ACCAH: + return static_cast(g_dsp.accelerator->GetCurrentAddress() >> 16); + case DSP_ACCAL: + return static_cast(g_dsp.accelerator->GetCurrentAddress()); + case DSP_FORMAT: + return g_dsp.accelerator->GetSampleFormat(); + case DSP_YN1: + return g_dsp.accelerator->GetYn1(); + case DSP_YN2: + return g_dsp.accelerator->GetYn2(); + case DSP_PRED_SCALE: + return g_dsp.accelerator->GetPredScale(); case DSP_ACCELERATOR: // ADPCM Accelerator reads - return dsp_read_accelerator(); - + return g_dsp.accelerator->Read(reinterpret_cast(&g_dsp.ifx_regs[DSP_COEF_A1_0])); case DSP_ACDATA1: // Accelerator reads (Zelda type) - "UnkZelda" - return dsp_read_aram_d3(); + return g_dsp.accelerator->ReadD3(); default: if ((addr & 0xff) >= 0xa0) diff --git a/Source/Core/Core/DSP/DSPHWInterface.h b/Source/Core/Core/DSP/DSPHWInterface.h index 329851fb9a..8c68e1c834 100644 --- a/Source/Core/Core/DSP/DSPHWInterface.h +++ b/Source/Core/Core/DSP/DSPHWInterface.h @@ -22,6 +22,6 @@ u16 gdsp_mbox_read_h(Mailbox mbx); u16 gdsp_mbox_read_l(Mailbox mbx); void gdsp_ifx_init(); -void gdsp_ifx_write(u32 addr, u32 val); +void gdsp_ifx_write(u32 addr, u16 val); u16 gdsp_ifx_read(u16 addr); } // namespace DSP diff --git a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp index 842362c1fb..476637badb 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp @@ -530,9 +530,9 @@ void DSPEmitter::dmem_write(X64Reg value) FixupBranch end = J(true); // else if (saddr == 0xf) SetJumpTarget(ifx); - // Does it mean gdsp_ifx_write needs u32 rather than u16? DSPJitRegCache c(m_gpr); X64Reg abisafereg = m_gpr.MakeABICallSafe(value); + MOVZX(32, 16, abisafereg, R(abisafereg)); m_gpr.PushRegs(); ABI_CallFunctionRR(gdsp_ifx_write, EAX, abisafereg); m_gpr.PopRegs(); diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 059ae46ee9..f061848fac 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -13,6 +13,7 @@ #endif #include +#include #include "Common/CommonTypes.h" #include "Common/MathUtil.h" @@ -163,21 +164,67 @@ void DumpPB(const PB_TYPE& pb) #endif // Simulated accelerator state. -static u32 acc_loop_addr, acc_end_addr; -static u32* acc_cur_addr; static PB_TYPE* acc_pb; static bool acc_end_reached; +class HLEAccelerator final : public Accelerator +{ +protected: + void OnEndException() override + { + if (acc_pb->audio_addr.looping) + { + // Set the ADPCM info to continue processing at loop_addr. + SetPredScale(acc_pb->adpcm_loop_info.pred_scale); + if (!acc_pb->is_stream) + { + SetYn1(acc_pb->adpcm_loop_info.yn1); + SetYn2(acc_pb->adpcm_loop_info.yn2); + } + else + { + // Refresh YN1 and YN2. This indirectly causes the accelerator to resume reads. + SetYn1(GetYn1()); + SetYn2(GetYn2()); +#ifdef AX_GC + // If we're streaming, increment the loop counter. + acc_pb->loop_counter++; +#endif + } + } + else + { + // Non looping voice reached the end -> running = 0. + acc_pb->running = 0; + +#ifdef AX_WII + // One of the few meaningful differences between AXGC and AXWii: + // while AXGC handles non looping voices ending by relying on the + // accelerator to stop reads once the loop address is reached, + // AXWii has the 0000 samples internally in DRAM and use an internal + // pointer to it (loop addr does not contain 0000 samples on AXWii!). + acc_end_reached = true; +#endif + } + } + + u8 ReadMemory(u32 address) override { return ReadARAM(address); } + void WriteMemory(u32 address, u8 value) override { WriteARAM(value, address); } +}; + +static std::unique_ptr s_accelerator = std::make_unique(); + // Sets up the simulated accelerator. -void AcceleratorSetup(PB_TYPE* pb, u32* cur_addr) +void AcceleratorSetup(PB_TYPE* pb) { acc_pb = pb; - // Masking occurs for the start and end addresses as soon as the registers are written to. - acc_loop_addr = HILO_TO_32(pb->audio_addr.loop_addr) & 0x3fffffff; - acc_end_addr = HILO_TO_32(pb->audio_addr.end_addr) & 0x3fffffff; - acc_cur_addr = cur_addr; - // It also happens for the current address, but with a different mask. - *acc_cur_addr &= 0xbfffffff; + s_accelerator->SetStartAddress(HILO_TO_32(pb->audio_addr.loop_addr)); + s_accelerator->SetEndAddress(HILO_TO_32(pb->audio_addr.end_addr)); + s_accelerator->SetCurrentAddress(HILO_TO_32(pb->audio_addr.cur_addr)); + s_accelerator->SetSampleFormat(pb->audio_addr.sample_format); + s_accelerator->SetYn1(pb->adpcm.yn1); + s_accelerator->SetYn2(pb->adpcm.yn2); + s_accelerator->SetPredScale(pb->adpcm.pred_scale); acc_end_reached = false; } @@ -190,50 +237,7 @@ u16 AcceleratorGetSample() if (acc_end_reached) return 0; - auto end_address_reached = [] { - // loop back to loop_addr. - *acc_cur_addr = acc_loop_addr; - - if (acc_pb->audio_addr.looping) - { - // Set the ADPCM info to continue processing at loop_addr. - // - // For some reason, yn1 and yn2 aren't set if the voice is not of - // stream type. This is what the AX UCode does and I don't really - // know why. - acc_pb->adpcm.pred_scale = acc_pb->adpcm_loop_info.pred_scale; - if (!acc_pb->is_stream) - { - acc_pb->adpcm.yn1 = acc_pb->adpcm_loop_info.yn1; - acc_pb->adpcm.yn2 = acc_pb->adpcm_loop_info.yn2; - } -#ifdef AX_GC - else - { - // If we're streaming, increment the loop counter. - acc_pb->loop_counter++; - } -#endif - } - else - { - // Non looping voice reached the end -> running = 0. - acc_pb->running = 0; - -#ifdef AX_WII - // One of the few meaningful differences between AXGC and AXWii: - // while AXGC handles non looping voices ending by having 0000 - // samples at the loop address, AXWii has the 0000 samples - // internally in DRAM and use an internal pointer to it (loop addr - // does not contain 0000 samples on AXWii!). - acc_end_reached = true; -#endif - } - }; - - return ReadAccelerator(acc_loop_addr, acc_end_addr, acc_cur_addr, - acc_pb->audio_addr.sample_format, &acc_pb->adpcm.yn1, &acc_pb->adpcm.yn2, - &acc_pb->adpcm.pred_scale, acc_pb->adpcm.coefs, end_address_reached); + return s_accelerator->Read(acc_pb->adpcm.coefs); } // Reads samples from the input callback, resamples them to samples at @@ -375,8 +379,7 @@ u32 ResampleAudio(std::function input_callback, s16* output, u32 count // if required. void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs) { - u32 cur_addr = HILO_TO_32(pb.audio_addr.cur_addr); - AcceleratorSetup(&pb, &cur_addr); + AcceleratorSetup(&pb); if (coeffs) coeffs += pb.coef_select * 0x200; @@ -385,9 +388,12 @@ void GetInputSamples(PB_TYPE& pb, s16* samples, u16 count, const s16* coeffs) pb.src.cur_addr_frac, HILO_TO_32(pb.src.ratio), pb.src_type, coeffs); pb.src.cur_addr_frac = (curr_pos & 0xFFFF); - // Update current position in the PB. - pb.audio_addr.cur_addr_hi = static_cast(cur_addr >> 16) & 0xbfff; - pb.audio_addr.cur_addr_lo = static_cast(cur_addr); + // Update current position, YN1, YN2 and pred scale in the PB. + pb.audio_addr.cur_addr_hi = static_cast(s_accelerator->GetCurrentAddress() >> 16); + pb.audio_addr.cur_addr_lo = static_cast(s_accelerator->GetCurrentAddress()); + pb.adpcm.yn1 = s_accelerator->GetYn1(); + pb.adpcm.yn2 = s_accelerator->GetYn2(); + pb.adpcm.pred_scale = s_accelerator->GetPredScale(); } // Add samples to an output buffer, with optional volume ramping. diff --git a/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp b/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp index cbbb4de6eb..1a55395525 100644 --- a/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp +++ b/Source/Core/Core/HW/DSPLLE/DSPLLE.cpp @@ -17,6 +17,7 @@ #include "Common/Thread.h" #include "Core/ConfigManager.h" #include "Core/Core.h" +#include "Core/DSP/DSPAccelerator.h" #include "Core/DSP/DSPCaptureLogger.h" #include "Core/DSP/DSPCore.h" #include "Core/DSP/DSPHWInterface.h" @@ -71,6 +72,7 @@ void DSPLLE::DoState(PointerWrap& p) p.Do(g_dsp.step_counter); p.DoArray(g_dsp.ifx_regs); + g_dsp.accelerator->DoState(p); p.Do(g_dsp.mbox[0]); p.Do(g_dsp.mbox[1]); Common::UnWriteProtectMemory(g_dsp.iram, DSP_IRAM_BYTE_SIZE, false); diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 4c22756e53..b17daba6ca 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -74,7 +74,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 89; // Last changed in PR 5890 +static const u32 STATE_VERSION = 90; // Last changed in PR 6077 // Maps savestate versions to Dolphin versions. // Versions after 42 don't need to be added to this list, diff --git a/Source/DSPSpy/main_spy.cpp b/Source/DSPSpy/main_spy.cpp index 10516dc252..5be2598404 100644 --- a/Source/DSPSpy/main_spy.cpp +++ b/Source/DSPSpy/main_spy.cpp @@ -390,6 +390,10 @@ void handle_dsp_mail(void) while (real_dsp.CheckMailTo()) ; } + else if (mail == 0x80050000) + { + CON_PrintRow(4, 25, "ACCOV at step %i", dsp_steps); + } // ROM dumping mails else if (mail == 0x8888c0de) diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds new file mode 100644 index 0000000000..20e72919f5 --- /dev/null +++ b/Source/DSPSpy/tests/accelerator_loop_test.ds @@ -0,0 +1,64 @@ +incdir "tests" +include "dsp_base.inc" + +; Test parameters +lri $AC0.M, #0x0000 ; start +lri $AC0.L, #0x0000 ; start +lri $AC1.M, #0x0000 ; end +lri $AC1.L, #0x0011 ; end + +; Reset some registers +lri $AC0.H, #0xffff +sr @0xffda, $AC0.H ; pred scale +sr @0xffdb, $AC0.H ; yn1 +sr @0xffdc, $AC0.H ; yn2 + +; Set the sample format +lri $AC0.H, #0x0 +sr @0xffd1, $AC0.H +; Set the starting and current address +srs @ACSAH, $AC0.M +srs @ACCAH, $AC0.M +srs @ACSAL, $AC0.L +srs @ACCAL, $AC0.L +; Set the ending address +srs @ACEAH, $AC1.M +srs @ACEAL, $AC1.L + +call load_hw_reg_to_regs +call send_back ; check the accelerator regs before a read + +bloopi #40, end_of_loop + lr $IX3, @ARAM + call load_hw_reg_to_regs + call send_back ; after a read +end_of_loop: + nop + +jmp end_of_test + +load_hw_reg_to_regs: + lr $AR0, @0xffd1 ; format + lr $AR1, @0xffd2 ; unknown + lr $AR2, @0xffda ; pred scale + lr $AR3, @0xffdb ; yn1 + lr $IX0, @0xffdc ; yn2 + lr $IX1, @0xffdf ; unknown accelerator register + + lri $AC0.H, #0 + lrs $AC0.M, @ACSAH + lrs $AC0.L, @ACSAL + + lri $AC1.H, #0 + lrs $AC1.M, @ACEAH + lrs $AC1.L, @ACEAL + + lrs $AX0.H, @ACCAH + lrs $AX0.L, @ACCAL + lrs $AX1.H, @ACCAH + lrs $AX1.L, @ACCAL + + lrs $AX1.H, @ACCAH + lrs $AX1.L, @ACCAL + + ret diff --git a/Source/DSPSpy/tests/dsp_base.inc b/Source/DSPSpy/tests/dsp_base.inc index 29c992b1c3..7fac883311 100644 --- a/Source/DSPSpy/tests/dsp_base.inc +++ b/Source/DSPSpy/tests/dsp_base.inc @@ -106,7 +106,7 @@ MEM_LO: equ 0x0f7F jmp start_of_test ; This is where we jump when we're done testing, see above. -; We just fall into a loop, playing dead until someone resets the DSP. +; We just fall into a loop, playing dead until someone resets the DSP. end_of_test: nop jmp end_of_test @@ -138,8 +138,18 @@ irq4: lri $ac0.m, #0x0004 jmp irq irq5: - lri $ac0.m, #0x0005 - jmp irq + lrs $ac0.m, @DMBH + andcf $ac0.m, #0x8000 + jlz irq5 + si @DMBH, #0x8005 + si @DMBL, #0x0000 + si @DIRQ, #0x0001 + lri $ac0.m, #0xbbbb + sr @0xffda, $ac0.m ; pred scale + sr @0xffdb, $ac0.m ; yn1 + lr $ix2, @ARAM + sr @0xffdc, $ac0.m ; yn2 + rti irq6: lri $ac0.m, #0x0006 jmp irq @@ -156,7 +166,7 @@ irq: si @DIRQ, #0x0001 halt ; Through some magic this allows us to properly ack the exception in dspspy ;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all - + ; DMA:s the current state of the registers back to the PowerPC. To do this, ; it must write the contents of all regs to DRAM. ; Unfortunately, this loop uses ar0 so it's best to use AR1 and friends for testing @@ -216,13 +226,13 @@ send_back: dma_copy: mrr $ax0.l, $ac1.m -; Wait for the CPU to send us a mail. +; Wait for the CPU to send us a mail. call 0x807e si @DMBH, #0x8888 si @DMBL, #0xfeeb si @DIRQ, #0x0001 - -; wait for the CPU to recieve our response before we execute the next op + +; wait for the CPU to recieve our response before we execute the next op call 0x8078 andi $ac0.m, #0x7fff lrs $ac1.m, @CMBL @@ -261,14 +271,14 @@ dma_copy: lrri $ac0.m, @$ar0 lrri $ac1.m, @$ar0 lr $ar0, @REGS_BASE - + ret ; from send_back - + ; If you are in set40 mode, use this instead of send_back if you want to stay ; in set40 mode. send_back_40: set16 - call send_back + call send_back set40 ret diff --git a/Source/UnitTests/Core/CMakeLists.txt b/Source/UnitTests/Core/CMakeLists.txt index 4df4ee52ff..ba8c228e88 100644 --- a/Source/UnitTests/Core/CMakeLists.txt +++ b/Source/UnitTests/Core/CMakeLists.txt @@ -2,11 +2,12 @@ add_dolphin_test(MMIOTest MMIOTest.cpp) add_dolphin_test(PageFaultTest PageFaultTest.cpp) add_dolphin_test(CoreTimingTest CoreTimingTest.cpp) +add_dolphin_test(DSPAcceleratorTest DSP/DSPAcceleratorTest.cpp) add_dolphin_test(DSPAssemblyTest DSP/DSPAssemblyTest.cpp DSP/DSPTestBinary.cpp DSP/DSPTestText.cpp DSP/HermesBinary.cpp -) +) add_dolphin_test(ESFormatsTest IOS/ES/FormatsTest.cpp IOS/ES/TestBinaryData.cpp) diff --git a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp new file mode 100644 index 0000000000..5cebc103a8 --- /dev/null +++ b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp @@ -0,0 +1,187 @@ +// Copyright 2017 Dolphin Emulator Project +// Licensed under GPLv2+ +// Refer to the license.txt file included. + +#include + +#include + +#include "Common/CommonTypes.h" +#include "Core/DSP/DSPAccelerator.h" + +// Simulated DSP accelerator. +class TestAccelerator : public DSP::Accelerator +{ +public: + // For convenience. + u16 TestRead() + { + std::array coefs{}; + m_accov_raised = false; + return Read(coefs.data()); + } + + bool EndExceptionRaised() const { return m_accov_raised; } +protected: + void OnEndException() override + { + EXPECT_TRUE(m_reads_stopped); + m_accov_raised = true; + } + u8 ReadMemory(u32 address) override { return 0; } + void WriteMemory(u32 address, u8 value) override {} + bool m_accov_raised = false; +}; + +TEST(DSPAccelerator, Initialization) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + EXPECT_EQ(accelerator.GetStartAddress(), 0x00000000u); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000000u); + EXPECT_EQ(accelerator.GetEndAddress(), 0x00001000u); +} + +TEST(DSPAccelerator, SimpleReads) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } +} + +TEST(DSPAccelerator, AddressMasking) +{ + TestAccelerator accelerator; + + accelerator.SetCurrentAddress(0x48000000); + accelerator.SetStartAddress(0x48000000); + accelerator.SetEndAddress(0x48001000); + EXPECT_EQ(accelerator.GetStartAddress(), 0x08000000u); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x08000000u); + EXPECT_EQ(accelerator.GetEndAddress(), 0x08001000u); + + accelerator.SetCurrentAddress(0xffffffff); + accelerator.SetStartAddress(0xffffffff); + accelerator.SetEndAddress(0xffffffff); + EXPECT_EQ(accelerator.GetStartAddress(), 0x3fffffffu); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0xbfffffffu); + EXPECT_EQ(accelerator.GetEndAddress(), 0x3fffffffu); +} + +TEST(DSPAccelerator, PredScaleRegisterMasking) +{ + TestAccelerator accelerator; + + accelerator.SetPredScale(0xbbbb); + EXPECT_EQ(accelerator.GetPredScale(), 0x3bu); + accelerator.SetPredScale(0xcccc); + EXPECT_EQ(accelerator.GetPredScale(), 0x4cu); + accelerator.SetPredScale(0xffff); + EXPECT_EQ(accelerator.GetPredScale(), 0x7fu); +} + +TEST(DSPAccelerator, OverflowBehaviour) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x0000000f); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_TRUE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + // Since an ACCOV has fired, reads are stopped (until the YN2 register is reset), + // so the current address shouldn't be updated for this read. + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + // Simulate a write to YN2, which internally resets the "reads stopped" flag. + // After resetting it, reads should work once again. + accelerator.SetYn2(0); + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } +} + +TEST(DSPAccelerator, OverflowFor16ByteAlignedAddresses) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00000010); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 2); +} + +TEST(DSPAccelerator, OverflowForXXXXXXX1Addresses) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00000011); + + for (size_t i = 1; i <= 0xf; ++i) + { + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + i); + } + + accelerator.TestRead(); + EXPECT_FALSE(accelerator.EndExceptionRaised()); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress()); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), accelerator.GetStartAddress() + 1); +} + +TEST(DSPAccelerator, CurrentAddressSkips) +{ + TestAccelerator accelerator; + accelerator.SetCurrentAddress(0x00000000); + accelerator.SetStartAddress(0x00000000); + accelerator.SetEndAddress(0x00001000); + + for (size_t j = 1; j <= 0xf; ++j) + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x0000000fu); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000012u); + + accelerator.TestRead(); + EXPECT_EQ(accelerator.GetCurrentAddress(), 0x00000013u); +}