From 6eaa4a4e890972704226098bf995e936454abd51 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Mon, 27 Jun 2022 12:46:35 -0700 Subject: [PATCH] DSPHLE: Add HLE version of libaesnd ucode --- Source/Core/Core/CMakeLists.txt | 2 + Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp | 403 +++++++++++++++++++ Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h | 74 ++++ Source/Core/Core/HW/DSPHLE/UCodes/UCodes.cpp | 7 + Source/Core/DolphinLib.props | 2 + 5 files changed, 488 insertions(+) create mode 100644 Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp create mode 100644 Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 99a143f2bd..745bf250c1 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -144,6 +144,8 @@ add_library(core HW/DSPHLE/MailHandler.h HW/DSPHLE/UCodes/ASnd.cpp HW/DSPHLE/UCodes/ASnd.h + HW/DSPHLE/UCodes/AESnd.cpp + HW/DSPHLE/UCodes/AESnd.h HW/DSPHLE/UCodes/AX.cpp HW/DSPHLE/UCodes/AX.h HW/DSPHLE/UCodes/AXStructs.h diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp new file mode 100644 index 0000000000..34c2308468 --- /dev/null +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp @@ -0,0 +1,403 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// High-level emulation for the libaesnd ucode, used by homebrew +// libaesnd is part of devkitPro's libogc, released under the Zlib license + +#include "Core/HW/DSPHLE/UCodes/AESnd.h" + +#include + +#include "Common/ChunkFile.h" +#include "Common/Logging/Log.h" +#include "Common/Swap.h" +#include "Core/DSP/DSPAccelerator.h" +#include "Core/HW/DSP.h" +#include "Core/HW/DSPHLE/DSPHLE.h" +#include "Core/HW/DSPHLE/MailHandler.h" +#include "Core/HW/DSPHLE/UCodes/UCodes.h" + +namespace DSP::HLE +{ +constexpr u32 MAIL_PREFIX = 0xface'0000; +constexpr u32 MAIL_PROCESS_FIRST_VOICE = MAIL_PREFIX | 0x0010; +constexpr u32 MAIL_PROCESS_NEXT_VOICE = MAIL_PREFIX | 0x0020; +constexpr u32 MAIL_GET_PB_ADDRESS = MAIL_PREFIX | 0x0080; +constexpr u32 MAIL_SEND_SAMPLES = MAIL_PREFIX | 0x0100; +constexpr u32 MAIL_TERMINATE = MAIL_PREFIX | 0xdead; + +// June 5, 2010 version (padded to 0x03e0 bytes) - initial release +// First included with libogc 1.8.4 on October 3, 2010: https://devkitpro.org/viewtopic.php?t=2249 +// https://github.com/devkitPro/libogc/blob/b5fdbdb069c45584aa4dfd950a136a8db9b1144c/libaesnd/dspcode/dspmixer.s +constexpr u32 HASH_2010 = 0x008366af; +// April 11, 2012 version (padded to 0x03e0 bytes) - swapped input channels +// First included with libogc 1.8.11 on April 22, 2012: https://devkitpro.org/viewtopic.php?t=3094 +// https://github.com/devkitPro/libogc/commit/8f188e12b6a3d8b5a0d49a109fe6a3e4e1702aab +constexpr u32 HASH_2012 = 0x078066ab; +// June 14, 2020 version (0x03e6 bytes) - added unsigned formats +// First included with libogc 2.1.0 on June 15, 2020: https://devkitpro.org/viewtopic.php?t=9079 +// https://github.com/devkitPro/libogc/commit/eac8fe2c29aa790d552dd6166a1fb195dfdcb825 +constexpr u32 HASH_2020 = 0x84c680a9; + +constexpr u32 VOICE_MONO8 = 0x00000000; +constexpr u32 VOICE_STEREO8 = 0x00000001; +constexpr u32 VOICE_MONO16 = 0x00000002; +constexpr u32 VOICE_STEREO16 = 0x00000003; +// These are only present in the 2020 release +constexpr u32 VOICE_MONO8_UNSIGNED = 0x00000004; +constexpr u32 VOICE_STEREO8_UNSIGNED = 0x00000005; +constexpr u32 VOICE_MONO16_UNSIGNED = 0x00000006; +constexpr u32 VOICE_STEREO16_UNSIGNED = 0x00000007; + +constexpr u32 VOICE_FORMAT_MASK_OLD = 3; +constexpr u32 VOICE_FORMAT_MASK_NEW = 7; +// Note that the above formats have the 2-bit set for 16-bit formats and not set for 8-bit formats +constexpr u32 VOICE_16_BIT_FLAG = 2; + +// These are used in the pre-2020 versions version +constexpr u32 VOICE_PAUSE_OLD = 0x00000004; +constexpr u32 VOICE_LOOP_OLD = 0x00000008; // not used by the DSP +constexpr u32 VOICE_ONCE_OLD = 0x00000010; // not used by the DSP +constexpr u32 VOICE_STREAM_OLD = 0x00000020; // not used by the DSP + +// These were changed in the 2020 version to account for the different flags +constexpr u32 VOICE_PAUSE_NEW = 0x00000008; +constexpr u32 VOICE_LOOP_NEW = 0x00000010; // not used by the DSP +constexpr u32 VOICE_ONCE_NEW = 0x00000020; // not used by the DSP +constexpr u32 VOICE_STREAM_NEW = 0x00000040; // not used by the DSP + +// These did not change between versions +constexpr u32 VOICE_FINISHED = 0x00100000; +constexpr u32 VOICE_STOPPED = 0x00200000; // not used by the DSP +constexpr u32 VOICE_RUNNING = 0x40000000; +constexpr u32 VOICE_USED = 0x80000000; // not used by the DSP + +// 1<<4 = scale gain by 1/1, 2<<2 = PCM decoding from ARAM, 1<<0 = 8-bit reads +constexpr u32 ACCELERATOR_FORMAT_8_BIT = 0x0019; +// 0<<4 = scale gain by 1/2048, 2<<2 = PCM decoding from ARAM, 2<<0 = 16-bit reads +constexpr u32 ACCELERATOR_FORMAT_16_BIT = 0x000a; +// Multiply samples by 0x100/1 = 0x100 (for ACCELERATOR_FORMAT_8_BIT) +constexpr u32 ACCELERATOR_GAIN_8_BIT = 0x0100; +// Multiply samples by 0x800/2048 = 1 (for ACCELERATOR_FORMAT_16_BIT) +constexpr u32 ACCELERATOR_GAIN_16_BIT = 0x0800; + +AESndUCode::AESndUCode(DSPHLE* dsphle, u32 crc) : UCodeInterface(dsphle, crc) +{ +} + +void AESndUCode::Initialize() +{ + m_mail_handler.PushMail(DSP_INIT); +} + +void AESndUCode::Update() +{ + // This is dubious in general, since we set the interrupt parameter on m_mail_handler.PushMail + if (m_mail_handler.HasPending()) + { + DSP::GenerateDSPInterruptFromDSPEmu(DSP::INT_DSP); + } +} + +void AESndUCode::HandleMail(u32 mail) +{ + if (m_upload_setup_in_progress) + { + PrepareBootUCode(mail); + } + else if (m_next_mail_is_parameter_block_addr) + { + // get_pb_address + m_parameter_block_addr = mail; + INFO_LOG_FMT(DSPHLE, "AESndUCode - Parameter block is at {:08x}", mail); + m_next_mail_is_parameter_block_addr = false; + // No mail is sent in response + } + else if ((mail & TASK_MAIL_MASK) == TASK_MAIL_TO_DSP) + { + switch (mail) + { + case MAIL_NEW_UCODE: + m_upload_setup_in_progress = true; + break; + case MAIL_RESET: + m_dsphle->SetUCode(UCODE_ROM); + break; + default: + WARN_LOG_FMT(DSPHLE, "AESndUCode - unknown 0xcdd1 command: {:08x}", mail); + break; + } + // No mail is sent in response to any of these. + // The actual uCode halts on unknown cdd1 commands, which I have not implemented. + } + else + { + // The uCode checks for a 0xface prefix, which we include in the constants above. + switch (mail) + { + case MAIL_PROCESS_FIRST_VOICE: + DEBUG_LOG_FMT(DSPHLE, "ASndUCode - MAIL_PROCESS_FIRST_VOICE"); + DMAInParameterBlock(); // dma_pb_block + m_output_buffer.fill(0); + DoMixing(); // fall through to dsp_mixer + // Mail is handled by DoMixing() + break; + case MAIL_PROCESS_NEXT_VOICE: + DEBUG_LOG_FMT(DSPHLE, "ASndUCode - MAIL_PROCESS_NEXT_VOICE"); + DMAInParameterBlock(); // dma_pb_block + DoMixing(); // jump to dsp_mixer + // Mail is handled by DoMixing() + break; + case MAIL_GET_PB_ADDRESS: + DEBUG_LOG_FMT(DSPHLE, "ASndUCode - MAIL_GET_PB_ADDRESS"); + m_next_mail_is_parameter_block_addr = true; + // No mail is sent in response + break; + case MAIL_SEND_SAMPLES: + DEBUG_LOG_FMT(DSPHLE, "ASndUCode - MAIL_SEND_SAMPLES"); + // send_samples + for (u32 i = 0; i < NUM_OUTPUT_SAMPLES * 2; i++) + { + HLEMemory_Write_U16(m_parameter_block.out_buf + i * sizeof(u16), m_output_buffer[i]); + } + m_mail_handler.PushMail(DSP_SYNC, true); + break; + case MAIL_TERMINATE: + INFO_LOG_FMT(DSPHLE, "ASndUCode - MAIL_TERMINATE: {:08x}", mail); + // This doesn't actually change the state of the system. + m_mail_handler.PushMail(DSP_DONE, true); + break; + default: + WARN_LOG_FMT(DSPHLE, "AESndUCode - unknown command: {:08x}", mail); + // No mail is sent in this case + break; + } + } +} + +void AESndUCode::DMAInParameterBlock() +{ + m_parameter_block.out_buf = HLEMemory_Read_U32(m_parameter_block_addr + 0); + m_parameter_block.buf_start = HLEMemory_Read_U32(m_parameter_block_addr + 4); + m_parameter_block.buf_end = HLEMemory_Read_U32(m_parameter_block_addr + 8); + m_parameter_block.buf_curr = HLEMemory_Read_U32(m_parameter_block_addr + 12); + m_parameter_block.yn1 = HLEMemory_Read_U16(m_parameter_block_addr + 16); + m_parameter_block.yn2 = HLEMemory_Read_U16(m_parameter_block_addr + 18); + m_parameter_block.pds = HLEMemory_Read_U16(m_parameter_block_addr + 20); + m_parameter_block.freq = HLEMemory_Read_U32(m_parameter_block_addr + 22); + m_parameter_block.counter = HLEMemory_Read_U16(m_parameter_block_addr + 26); + m_parameter_block.left = HLEMemory_Read_U16(m_parameter_block_addr + 28); + m_parameter_block.right = HLEMemory_Read_U16(m_parameter_block_addr + 30); + m_parameter_block.volume_l = HLEMemory_Read_U16(m_parameter_block_addr + 32); + m_parameter_block.volume_r = HLEMemory_Read_U16(m_parameter_block_addr + 34); + m_parameter_block.delay = HLEMemory_Read_U32(m_parameter_block_addr + 36); + m_parameter_block.flags = HLEMemory_Read_U32(m_parameter_block_addr + 40); +} + +void AESndUCode::DMAOutParameterBlock() +{ + HLEMemory_Write_U32(m_parameter_block_addr + 0, m_parameter_block.out_buf); + HLEMemory_Write_U32(m_parameter_block_addr + 4, m_parameter_block.buf_start); + HLEMemory_Write_U32(m_parameter_block_addr + 8, m_parameter_block.buf_end); + HLEMemory_Write_U32(m_parameter_block_addr + 12, m_parameter_block.buf_curr); + HLEMemory_Write_U16(m_parameter_block_addr + 16, m_parameter_block.yn1); + HLEMemory_Write_U16(m_parameter_block_addr + 18, m_parameter_block.yn2); + HLEMemory_Write_U16(m_parameter_block_addr + 20, m_parameter_block.pds); + HLEMemory_Write_U32(m_parameter_block_addr + 22, m_parameter_block.freq); + HLEMemory_Write_U16(m_parameter_block_addr + 26, m_parameter_block.counter); + HLEMemory_Write_U16(m_parameter_block_addr + 28, m_parameter_block.left); + HLEMemory_Write_U16(m_parameter_block_addr + 30, m_parameter_block.right); + HLEMemory_Write_U16(m_parameter_block_addr + 32, m_parameter_block.volume_l); + HLEMemory_Write_U16(m_parameter_block_addr + 34, m_parameter_block.volume_r); + HLEMemory_Write_U32(m_parameter_block_addr + 36, m_parameter_block.delay); + HLEMemory_Write_U32(m_parameter_block_addr + 40, m_parameter_block.flags); +} + +class AESndAccelerator final : public Accelerator +{ +protected: + void OnEndException() override + { + // exception5 - this updates internal state + SetYn1(GetYn1()); + SetYn2(GetYn2()); + SetPredScale(GetPredScale()); + } + + u8 ReadMemory(u32 address) override { return ReadARAM(address); } + void WriteMemory(u32 address, u8 value) override { WriteARAM(value, address); } +}; + +static std::unique_ptr s_accelerator = std::make_unique(); +static constexpr std::array ACCELERATOR_COEFS = {}; // all zeros + +void AESndUCode::SetUpAccelerator(u16 format, [[maybe_unused]] u16 gain) +{ + // setup_accl + s_accelerator->SetSampleFormat(format); + // not currently implemented, but it doesn't matter since the gain is configured to be a no-op + // s_accelerator->SetGain(gain); + s_accelerator->SetStartAddress(m_parameter_block.buf_start); + s_accelerator->SetEndAddress(m_parameter_block.buf_end); + s_accelerator->SetCurrentAddress(m_parameter_block.buf_curr); + s_accelerator->SetYn1(m_parameter_block.yn1); + s_accelerator->SetYn2(m_parameter_block.yn2); + s_accelerator->SetPredScale(m_parameter_block.pds); + // All of the coefficients (COEF_A1_0 at ffa0 - COEF_A2_7 at ffaf) are set to 0 +} + +void AESndUCode::DoMixing() +{ + const u32 pause_flag = (m_crc == HASH_2020) ? VOICE_PAUSE_NEW : VOICE_PAUSE_OLD; + const u32 format_mask = (m_crc == HASH_2020) ? VOICE_FORMAT_MASK_NEW : VOICE_FORMAT_MASK_OLD; + // dsp_mixer + const bool paused = (m_parameter_block.flags & pause_flag) != 0; + const bool running = (m_parameter_block.flags & VOICE_RUNNING) != 0; + const bool has_buf = m_parameter_block.buf_start != 0; + if (!paused && running && has_buf) + { + // no_change_buffer + const u32 voice_format = m_parameter_block.flags & format_mask; + const bool is_16_bit = (voice_format & VOICE_16_BIT_FLAG) != 0; + // select_format table + const u16 accelerator_format = is_16_bit ? ACCELERATOR_FORMAT_16_BIT : ACCELERATOR_FORMAT_8_BIT; + const u16 accelerator_gain = is_16_bit ? ACCELERATOR_GAIN_16_BIT : ACCELERATOR_GAIN_8_BIT; + SetUpAccelerator(accelerator_format, accelerator_gain); + // read from select_mixer skipped + + // Note: We don't handle masking/sign extension from use of $ac0.h and $ac1.h, as this isn't + // something that should happen if the PowerPC code is using the library normally. + // This applies throughout. + u32 delay = m_parameter_block.delay; + u32 sample_index = 0; + u32 remaining_samples = NUM_OUTPUT_SAMPLES; + + if (delay != 0) + { + // delay_loop + // This is implemented with a BLOOP and then a read from the loop counter stack register in + // the actual uCode. I'm not 100% sure how it works/if it works correctly there. + while (remaining_samples != 0) + { + remaining_samples--; + delay--; + if (delay == 0) + break; + sample_index++; + } + // exit_delay + m_parameter_block.delay = delay; + } + // no_delay - operates in set40 mode + while (remaining_samples != 0) // BLOOP dspmixer_loop_end + { + remaining_samples--; + s32 right = m_output_buffer[sample_index * 2 + 0]; + s32 left = m_output_buffer[sample_index * 2 + 1]; + right += m_parameter_block.right; + left += m_parameter_block.left; + // Clamping from set40 mode + right = std::clamp(right, -32768, 32767); + left = std::clamp(left, -32768, 32767); + m_output_buffer[sample_index * 2 + 0] = right; + m_output_buffer[sample_index * 2 + 1] = left; + sample_index++; + + const u32 counter = static_cast(m_parameter_block.counter) + m_parameter_block.freq; + m_parameter_block.counter = static_cast(counter); + u16 counter_h = counter >> 16; + if (counter_h >= 1) + { + s16 new_l = 0, new_r = 0; + + // jrge $ar3 (using select_mixer table) + // We already masked voice_format with the right mask for the version, so nothing special + // needs to be done for this switch statement to exclude unsupported formats + switch (voice_format) + { + case VOICE_MONO8: + case VOICE_MONO16: + // mono_mix + while (counter_h >= 1) + { + counter_h--; + new_r = s_accelerator->Read(ACCELERATOR_COEFS.data()); + new_l = new_r; + } + break; + + case VOICE_STEREO8: + case VOICE_STEREO16: + // stereo_mix + while (counter_h >= 1) + { + counter_h--; + new_r = s_accelerator->Read(ACCELERATOR_COEFS.data()); + new_l = s_accelerator->Read(ACCELERATOR_COEFS.data()); + } + break; // falls through to mix_samples normally + + case VOICE_MONO8_UNSIGNED: + case VOICE_MONO16_UNSIGNED: + // mono_unsigned_mix + while (counter_h >= 1) + { + counter_h--; + new_r = s_accelerator->Read(ACCELERATOR_COEFS.data()); + new_l = new_r; + } + new_r ^= 0x8000; + new_l ^= 0x8000; + break; + + case VOICE_STEREO8_UNSIGNED: + case VOICE_STEREO16_UNSIGNED: + // stereo_unsigned_mix + while (counter_h >= 1) + { + counter_h--; + new_r = s_accelerator->Read(ACCELERATOR_COEFS.data()); + new_l = s_accelerator->Read(ACCELERATOR_COEFS.data()); + } + new_r ^= 0x8000; + new_l ^= 0x8000; + break; + } + if (m_crc == HASH_2012 || m_crc == HASH_2020) + { + // The 2012 version swapped the left and right input channels so that left comes first, + // and then right. Before, right came before left. The 2012 version didn't update comments + // for it, though. + std::swap(new_r, new_l); + } + // mix_samples + const s32 mixed_l = (static_cast(new_l) * m_parameter_block.volume_l) >> 8; + const s32 mixed_r = (static_cast(new_r) * m_parameter_block.volume_r) >> 8; + // Clamping from set40 mode + m_parameter_block.left = std::clamp(mixed_l, -32768, 32767); + m_parameter_block.right = std::clamp(mixed_r, -32768, 32767); + } + // no_mix - we don't need to do anything as we modify m_parameter_block.left/right in place + } + // mixer_end - back to set16 mode + m_parameter_block.pds = s_accelerator->GetPredScale(); + m_parameter_block.yn2 = s_accelerator->GetYn2(); + m_parameter_block.yn1 = s_accelerator->GetYn1(); + m_parameter_block.buf_curr = s_accelerator->GetCurrentAddress(); + } + // finish_voice + m_parameter_block.flags |= VOICE_FINISHED; + DMAOutParameterBlock(); // dma_pb_block + m_mail_handler.PushMail(DSP_SYNC, true); +} + +void AESndUCode::DoState(PointerWrap& p) +{ + DoStateShared(p); + p.Do(m_next_mail_is_parameter_block_addr); + p.Do(m_parameter_block_addr); + p.Do(m_parameter_block); + p.Do(m_output_buffer); + s_accelerator->DoState(p); +} +} // namespace DSP::HLE diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h new file mode 100644 index 0000000000..a13046be94 --- /dev/null +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h @@ -0,0 +1,74 @@ +// Copyright 2022 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" +#include "Core/HW/DSPHLE/UCodes/UCodes.h" + +namespace DSP::HLE +{ +class DSPHLE; + +class AESndUCode final : public UCodeInterface +{ +public: + AESndUCode(DSPHLE* dsphle, u32 crc); + + void Initialize() override; + void HandleMail(u32 mail) override; + void Update() override; + void DoState(PointerWrap& p) override; + +private: + void DMAInParameterBlock(); + void DMAOutParameterBlock(); + void SetUpAccelerator(u16 format, u16 gain); + void DoMixing(); + + // Copied from libaesnd/aesndlib.c's aesndpb_t (specifically the first 64 bytes) +#pragma pack(1) + struct ParameterBlock + { + u32 out_buf; + + u32 buf_start; + u32 buf_end; + u32 buf_curr; + + u16 yn1; + u16 yn2; + u16 pds; + + // Note: only u16-aligned, not u32-aligned. + // libogc's version has separate u16 freq_l and freq_h fields, but we use #pragma pack(1). + u32 freq; + u16 counter; + + s16 left; + s16 right; + u16 volume_l; + u16 volume_r; + + u32 delay; + + u32 flags; + u8 _pad[20]; + }; +#pragma pack() + static_assert(sizeof(ParameterBlock) == sizeof(u16) * 0x20); + + bool m_next_mail_is_parameter_block_addr = false; + u32 m_parameter_block_addr = 0; + + ParameterBlock m_parameter_block{}; + + // Number of 16-bit stereo samples in the output buffer: 2ms of sample data + static constexpr u32 NUM_OUTPUT_SAMPLES = 96; + + std::array m_output_buffer{}; +}; +} // namespace DSP::HLE diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/UCodes.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/UCodes.cpp index 892c67dad7..94a594aec7 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/UCodes.cpp +++ b/Source/Core/Core/HW/DSPHLE/UCodes/UCodes.cpp @@ -20,6 +20,7 @@ #include "Core/ConfigManager.h" #include "Core/DSP/DSPCodeUtil.h" #include "Core/HW/DSPHLE/DSPHLE.h" +#include "Core/HW/DSPHLE/UCodes/AESnd.h" #include "Core/HW/DSPHLE/UCodes/ASnd.h" #include "Core/HW/DSPHLE/UCodes/AX.h" #include "Core/HW/DSPHLE/UCodes/AXWii.h" @@ -290,6 +291,12 @@ std::unique_ptr UCodeFactory(u32 crc, DSPHLE* dsphle, bool wii) INFO_LOG_FMT(DSPHLE, "CRC {:08x}: ASnd chosen (Homebrew)", crc); return std::make_unique(dsphle, crc); + case 0x008366af: + case 0x078066ab: + case 0x84c680a9: + INFO_LOG_FMT(DSPHLE, "CRC {:08x}: AESnd chosen (Homebrew)", crc); + return std::make_unique(dsphle, crc); + default: if (wii) { diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 13d1525b74..af170d382e 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -238,6 +238,7 @@ + @@ -858,6 +859,7 @@ +