Merge pull request #10766 from xperia64/dsp_format_fun

DSP accelerator improvements
This commit is contained in:
JMC47
2025-05-09 19:04:23 -04:00
committed by GitHub
18 changed files with 567 additions and 275 deletions

View File

@ -11,86 +11,152 @@
namespace DSP namespace DSP
{ {
u16 Accelerator::ReadD3() u16 Accelerator::GetCurrentSample()
{ {
u16 val = 0; u16 val = 0;
// The lower two bits of the sample format indicate the access size
switch (m_sample_format) switch (m_sample_format.size)
{ {
case 0x5: // u8 reads case FormatSize::Size4Bit:
val = ReadMemory(m_current_address >> 1);
if (m_current_address & 1)
val &= 0xf;
else
val >>= 4;
break;
case FormatSize::Size8Bit:
val = ReadMemory(m_current_address); val = ReadMemory(m_current_address);
m_current_address++;
break; break;
case 0x6: // u16 reads case FormatSize::Size16Bit:
val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
m_current_address++;
break; break;
default: default: // produces garbage, but affects the current address
ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_d3() - unknown format {:#x}", m_sample_format); ERROR_LOG_FMT(DSPLLE, "GetCurrentSample() - bad format {:#x}", m_sample_format.hex);
break; break;
} }
if (m_current_address >= m_end_address)
{
// Set address back to start address. (never seen this here!)
m_current_address = m_start_address;
}
return val; return val;
} }
void Accelerator::WriteD3(u16 value) u16 Accelerator::ReadRaw()
{
u16 val = GetCurrentSample();
if (m_sample_format.size != FormatSize::SizeInvalid)
{
m_current_address++;
}
else
{
m_current_address = (m_current_address & ~3) | ((m_current_address + 1) & 3);
}
// There are edge cases that are currently not handled here in u4 and u8 mode
// In u8 mode, if ea & 1 == 0 and ca == ea + 1, the accelerator can be read twice. Upon the second
// read, the data returned appears to be the other half of the u16 from the first read, and the
// DSP resets the current address and throws exception 3
// During these reads, ca is not incremented normally.
// Instead, incrementing ca works like this: ca = (ca & ~ 3) | ((ca + 1) & 3)
// u4 mode extends this further.
// When ea & 3 == 0, and ca in [ea + 1, ea + 3], the accelerator can be read (4 - (ca - ea - 1))
// times. On the last read, the data returned appears to be the remaining nibble of the u16 from
// the first read, and the DSP resets the current address and throws exception 3
// When ea & 3 == 1, and ca in [ea + 1, ea + 2], the accelerator can be read (4 - (ca - ea - 1))
// times. On the last read, the data returned appears to be the remaining nibble of the u16 from
// the first read, and the DSP resets the current address and throws exception 3
// When ea & 3 == 2, and ca == ea + 1, the accelerator can be read 4 times. On the last read, the
// data returned appears to be the remaining nibble of the u16 from the first read, and the DSP
// resets the current address and throws exception 3
// There are extra extra edge cases if ca, ea, and potentially other registers are adjusted during
// this pre-reset phase
// The cleanest way to emulate the normal non-edge behavior is to only reset things if we just
// read the end address. If the current address is larger than the end address (and not in the
// edge range), it ignores the end address
if (m_current_address - 1 == m_end_address)
{
// Set address back to start address (confirmed on hardware)
m_current_address = m_start_address;
OnRawReadEndException();
}
SetCurrentAddress(m_current_address);
return val;
}
void Accelerator::WriteRaw(u16 value)
{ {
// Zelda ucode writes a bunch of zeros to ARAM through d3 during // Zelda ucode writes a bunch of zeros to ARAM through d3 during
// initialization. Don't know if it ever does it later, too. // initialization. Don't know if it ever does it later, too.
// Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too) // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too)
// Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too) // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too)
switch (m_sample_format) // Writes only seem to be accepted when the upper most bit of the address is set
if (m_current_address & 0x80000000)
{ {
case 0xA: // u16 writes // The format doesn't matter for raw writes; all writes are u16 and the address is treated as if
// we are in a 16-bit format
WriteMemory(m_current_address * 2, value >> 8); WriteMemory(m_current_address * 2, value >> 8);
WriteMemory(m_current_address * 2 + 1, value & 0xFF); WriteMemory(m_current_address * 2 + 1, value & 0xFF);
m_current_address++; m_current_address++;
break; OnRawWriteEndException();
default: }
ERROR_LOG_FMT(DSPLLE, "dsp_write_aram_d3() - unknown format {:#x}", m_sample_format); else
break; {
ERROR_LOG_FMT(DSPLLE, "WriteRaw() - tried to write to address {:#x} without high bit set",
m_current_address);
} }
} }
u16 Accelerator::Read(const s16* coefs) u16 Accelerator::ReadSample(const s16* coefs)
{ {
if (m_reads_stopped) if (m_reads_stopped)
return 0x0000; return 0x0000;
u16 val; if (m_sample_format.unk != 0)
u8 step_size_bytes = 0; {
WARN_LOG_FMT(DSPLLE, "ReadSample() format {:#x} has unknown upper bits set",
m_sample_format.hex);
}
// let's do the "hardware" decode DSP_FORMAT is interesting - the Zelda u16 val = 0;
// ucode seems to indicate that the bottom two bits specify the "read size" u8 step_size = 0;
// and the address multiplier. The bits above that may be things like sign s16 raw_sample;
// extension and do/do not use ADPCM. It also remains to be figured out if (m_sample_format.decode == FormatDecode::MMIOPCMNoInc ||
// whether there's a difference between the usual accelerator "read m_sample_format.decode == FormatDecode::MMIOPCMInc)
// address" and 0xd3.
switch (m_sample_format)
{ {
case 0x00: // ADPCM audio // The addresses can be complete nonsense in either of these modes
raw_sample = m_input;
}
else
{ {
raw_sample = GetCurrentSample();
}
int coef_idx = (m_pred_scale >> 4) & 0x7;
s32 coef1 = coefs[coef_idx * 2 + 0];
s32 coef2 = coefs[coef_idx * 2 + 1];
switch (m_sample_format.decode)
{
case FormatDecode::ADPCM: // ADPCM audio
{
// ADPCM really only supports 4-bit decoding, but for larger values on hardware, it just ignores
// the upper 12 bits
raw_sample &= 0xF;
int scale = 1 << (m_pred_scale & 0xF); int scale = 1 << (m_pred_scale & 0xF);
int coef_idx = (m_pred_scale >> 4) & 0x7;
s32 coef1 = coefs[coef_idx * 2 + 0]; if (raw_sample >= 8)
s32 coef2 = coefs[coef_idx * 2 + 1]; raw_sample -= 16;
int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) : s32 val32 = (scale * raw_sample) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
(ReadMemory(m_current_address >> 1) >> 4);
if (temp >= 8)
temp -= 16;
s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
val = static_cast<s16>(std::clamp<s32>(val32, -0x7FFF, 0x7FFF)); val = static_cast<s16>(std::clamp<s32>(val32, -0x7FFF, 0x7FFF));
step_size_bytes = 2; step_size = 2;
m_yn2 = m_yn1; m_yn2 = m_yn1;
m_yn1 = val; m_yn1 = val;
@ -100,6 +166,7 @@ u16 Accelerator::Read(const s16* coefs)
// the ACCOV exception does not fire at all, the predscale register is not updated, // the ACCOV exception does not fire at all, the predscale register is not updated,
// and if the end address is 16-byte aligned, the DSP loops to start_address + 1 // and if the end address is 16-byte aligned, the DSP loops to start_address + 1
// instead of start_address. // instead of start_address.
// TODO: This probably needs to be adjusted when using 8 or 16-bit accesses.
if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address) if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
{ {
m_current_address = m_start_address + 1; m_current_address = m_start_address + 1;
@ -113,47 +180,54 @@ u16 Accelerator::Read(const s16* coefs)
{ {
m_pred_scale = ReadMemory((m_current_address & ~15) >> 1); m_pred_scale = ReadMemory((m_current_address & ~15) >> 1);
m_current_address += 2; m_current_address += 2;
step_size_bytes += 2; step_size += 2;
} }
break; break;
} }
case 0x0A: // 16-bit PCM audio case FormatDecode::MMIOPCMNoInc:
val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1); case FormatDecode::PCM: // 16-bit PCM audio
case FormatDecode::MMIOPCMInc:
{
// Gain seems to only apply for PCM decoding
u8 gain_shift = 0;
switch (m_sample_format.gain_scale)
{
case FormatGainScale::GainScale2048:
gain_shift = 11; // x / 2048 = x >> 11
break;
case FormatGainScale::GainScale1:
gain_shift = 0; // x / 1 = x >> 0
break;
case FormatGainScale::GainScale65536:
gain_shift = 16; // x / 65536 = x >> 16
break;
default:
ERROR_LOG_FMT(DSPLLE, "ReadSample() invalid gain mode in format {:#x}", m_sample_format.hex);
break;
}
s32 val32 = ((static_cast<s32>(m_gain) * raw_sample) >> gain_shift) +
(((coef1 * m_yn1) >> gain_shift) + ((coef2 * m_yn2) >> gain_shift));
val = static_cast<s16>(val32);
m_yn2 = m_yn1; m_yn2 = m_yn1;
m_yn1 = val; m_yn1 = val;
step_size_bytes = 2; step_size = 2;
m_current_address += 1; if (m_sample_format.decode != FormatDecode::MMIOPCMNoInc)
break; {
case 0x19: // 8-bit PCM audio m_current_address += 1;
val = ReadMemory(m_current_address) << 8; }
m_yn2 = m_yn1;
m_yn1 = val;
step_size_bytes = 2;
m_current_address += 1;
break;
default:
ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator() - unknown format {:#x}", m_sample_format);
step_size_bytes = 2;
m_current_address += 1;
val = 0;
break; break;
} }
}
// TODO: Take GAIN into account
// adpcm = 0, pcm8 = 0x100, pcm16 = 0x800
// games using pcm8 : Phoenix Wright Ace Attorney (WiiWare), Megaman 9-10 (WiiWare)
// games using pcm16: GC Sega games, ...
// Check for loop. // Check for loop.
// Somehow, YN1 and YN2 must be initialized with their "loop" values, // YN1 and YN2 need to be initialized with their "loop" values,
// so yeah, it seems likely that we should raise an exception to let // which is usually done upon this exception.
// the DSP program do that, at least if DSP_FORMAT == 0x0A. if (m_current_address == (m_end_address + step_size - 1))
if (m_current_address == (m_end_address + step_size_bytes - 1))
{ {
// Set address back to start address. // Set address back to start address.
m_current_address = m_start_address; m_current_address = m_start_address;
m_reads_stopped = true; m_reads_stopped = true;
OnEndException(); OnSampleReadEndException();
} }
SetCurrentAddress(m_current_address); SetCurrentAddress(m_current_address);
@ -192,7 +266,12 @@ void Accelerator::SetCurrentAddress(u32 address)
void Accelerator::SetSampleFormat(u16 format) void Accelerator::SetSampleFormat(u16 format)
{ {
m_sample_format = format; m_sample_format.hex = format;
}
void Accelerator::SetGain(s16 gain)
{
m_gain = gain;
} }
void Accelerator::SetYn1(s16 yn1) void Accelerator::SetYn1(s16 yn1)
@ -210,4 +289,10 @@ void Accelerator::SetPredScale(u16 pred_scale)
{ {
m_pred_scale = pred_scale & 0x7f; m_pred_scale = pred_scale & 0x7f;
} }
void Accelerator::SetInput(u16 input)
{
m_input = input;
}
} // namespace DSP } // namespace DSP

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include "Common/BitField.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
class PointerWrap; class PointerWrap;
@ -14,41 +15,85 @@ class Accelerator
public: public:
virtual ~Accelerator() = default; virtual ~Accelerator() = default;
u16 Read(const s16* coefs); u16 ReadSample(const s16* coefs);
// Zelda ucode reads ARAM through 0xffd3. // Zelda ucode reads ARAM through 0xffd3.
u16 ReadD3(); u16 ReadRaw();
void WriteD3(u16 value); void WriteRaw(u16 value);
u32 GetStartAddress() const { return m_start_address; } u32 GetStartAddress() const { return m_start_address; }
u32 GetEndAddress() const { return m_end_address; } u32 GetEndAddress() const { return m_end_address; }
u32 GetCurrentAddress() const { return m_current_address; } u32 GetCurrentAddress() const { return m_current_address; }
u16 GetSampleFormat() const { return m_sample_format; } u16 GetSampleFormat() const { return m_sample_format.hex; }
s16 GetGain() const { return m_gain; }
s16 GetYn1() const { return m_yn1; } s16 GetYn1() const { return m_yn1; }
s16 GetYn2() const { return m_yn2; } s16 GetYn2() const { return m_yn2; }
u16 GetPredScale() const { return m_pred_scale; } u16 GetPredScale() const { return m_pred_scale; }
u16 GetInput() const { return m_input; }
void SetStartAddress(u32 address); void SetStartAddress(u32 address);
void SetEndAddress(u32 address); void SetEndAddress(u32 address);
void SetCurrentAddress(u32 address); void SetCurrentAddress(u32 address);
void SetSampleFormat(u16 format); void SetSampleFormat(u16 format);
void SetGain(s16 gain);
void SetYn1(s16 yn1); void SetYn1(s16 yn1);
void SetYn2(s16 yn2); void SetYn2(s16 yn2);
void SetPredScale(u16 pred_scale); void SetPredScale(u16 pred_scale);
void SetInput(u16 input);
void DoState(PointerWrap& p); void DoState(PointerWrap& p);
protected: protected:
virtual void OnEndException() = 0; virtual void OnRawReadEndException() = 0;
virtual void OnRawWriteEndException() = 0;
virtual void OnSampleReadEndException() = 0;
virtual u8 ReadMemory(u32 address) = 0; virtual u8 ReadMemory(u32 address) = 0;
virtual void WriteMemory(u32 address, u8 value) = 0; virtual void WriteMemory(u32 address, u8 value) = 0;
u16 GetCurrentSample();
// DSP accelerator registers. // DSP accelerator registers.
u32 m_start_address = 0; u32 m_start_address = 0;
u32 m_end_address = 0; u32 m_end_address = 0;
u32 m_current_address = 0; u32 m_current_address = 0;
u16 m_sample_format = 0;
enum class FormatSize : u16
{
Size4Bit = 0,
Size8Bit = 1,
Size16Bit = 2,
SizeInvalid = 3
};
enum class FormatDecode : u16
{
ADPCM = 0, // ADPCM reads from ARAM, ACCA increments
MMIOPCMNoInc = 1, // PCM Reads from ACIN, ACCA doesn't increment
PCM = 2, // PCM reads from ARAM, ACCA increments
MMIOPCMInc = 3 // PCM reads from ACIN, ACCA increments
};
// When reading samples (at least in PCM mode), they are multiplied by the gain, then divided by
// the value specified here
enum class FormatGainScale : u16
{
GainScale2048 = 0,
GainScale1 = 1,
GainScale65536 = 2,
GainScaleInvalid = 3
};
union SampleFormat
{
u16 hex;
BitField<0, 2, FormatSize> size;
BitField<2, 2, FormatDecode> decode;
BitField<4, 2, FormatGainScale> gain_scale;
BitField<6, 10, u16> unk;
} m_sample_format{0};
s16 m_gain = 0;
s16 m_yn1 = 0; s16 m_yn1 = 0;
s16 m_yn2 = 0; s16 m_yn2 = 0;
u16 m_pred_scale = 0; u16 m_pred_scale = 0;
u16 m_input = 0;
// When an ACCOV is triggered, the accelerator stops reading back anything // When an ACCOV is triggered, the accelerator stops reading back anything
// and updating the current address register, unless the YN2 register is written to. // and updating the current address register, unless the YN2 register is written to.

View File

@ -109,7 +109,18 @@ public:
protected: protected:
u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); } u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); }
void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); } void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); }
void OnEndException() override { m_dsp.SetException(ExceptionType::AcceleratorOverflow); } void OnRawReadEndException() override
{
m_dsp.SetException(ExceptionType::AcceleratorRawReadOverflow);
}
void OnRawWriteEndException() override
{
m_dsp.SetException(ExceptionType::AcceleratorRawWriteOverflow);
}
void OnSampleReadEndException() override
{
m_dsp.SetException(ExceptionType::AcceleratorSampleReadOverflow);
}
private: private:
SDSP& m_dsp; SDSP& m_dsp;

View File

@ -152,10 +152,10 @@ enum : u32
DSP_DSMAH = 0xce, // DSP DMA Address High (External) DSP_DSMAH = 0xce, // DSP DMA Address High (External)
DSP_DSMAL = 0xcf, // DSP DMA Address Low (External) DSP_DSMAL = 0xcf, // DSP DMA Address Low (External)
DSP_FORMAT = 0xd1, // Sample format DSP_FORMAT = 0xd1, // Sample format
DSP_ACUNK = 0xd2, // Set to 3 on my dumps DSP_ACUNK = 0xd2, // Set to 3 on my dumps
DSP_ACDATA1 = 0xd3, // Used only by Zelda ucodes DSP_ACDRAW = 0xd3, // Raw accelerator accesses
DSP_ACSAH = 0xd4, // Start of loop DSP_ACSAH = 0xd4, // Start of loop
DSP_ACSAL = 0xd5, DSP_ACSAL = 0xd5,
DSP_ACEAH = 0xd6, // End of sample (and loop) DSP_ACEAH = 0xd6, // End of sample (and loop)
DSP_ACEAL = 0xd7, DSP_ACEAL = 0xd7,
@ -164,9 +164,9 @@ enum : u32
DSP_PRED_SCALE = 0xda, // ADPCM predictor and scale DSP_PRED_SCALE = 0xda, // ADPCM predictor and scale
DSP_YN1 = 0xdb, DSP_YN1 = 0xdb,
DSP_YN2 = 0xdc, DSP_YN2 = 0xdc,
DSP_ACCELERATOR = 0xdd, // ADPCM accelerator read. Used by AX. DSP_ACDSAMP = 0xdd, // Accelerator sample reads, processed differently depending on FORMAT
DSP_GAIN = 0xde, DSP_GAIN = 0xde,
DSP_ACUNK2 = 0xdf, // Set to 0xc on my dumps DSP_ACIN = 0xdf, // Feeds PCM samples written here
DSP_AMDM = 0xef, // ARAM DMA Request Mask 0: DMA with ARAM unmasked 1: masked DSP_AMDM = 0xef, // ARAM DMA Request Mask 0: DMA with ARAM unmasked 1: masked
@ -226,13 +226,13 @@ enum : u16
// Exception vectors // Exception vectors
enum class ExceptionType enum class ExceptionType
{ {
StackOverflow = 1, // 0x0002 stack under/over flow StackOverflow = 1, // 0x0002 stack under/over flow
EXP_2 = 2, // 0x0004 EXP_2 = 2, // 0x0004
EXP_3 = 3, // 0x0006 AcceleratorRawReadOverflow = 3, // 0x0006 accelerator raw read address overflow
EXP_4 = 4, // 0x0008 AcceleratorRawWriteOverflow = 4, // 0x0008 accelerator raw write address overflow
AcceleratorOverflow = 5, // 0x000a accelerator address overflow AcceleratorSampleReadOverflow = 5, // 0x000a accelerator sample reads address overflow
EXP_6 = 6, // 0x000c EXP_6 = 6, // 0x000c
ExternalInterrupt = 7 // 0x000e external int (message from CPU) ExternalInterrupt = 7 // 0x000e external int (message from CPU)
}; };
enum class Mailbox enum class Mailbox

View File

@ -122,12 +122,6 @@ void SDSP::WriteIFX(u32 address, u16 value)
m_ifx_regs[DSP_DSBL] = 0; m_ifx_regs[DSP_DSBL] = 0;
break; break;
case DSP_GAIN:
if (value != 0)
{
DEBUG_LOG_FMT(DSPLLE, "Gain Written: {:#06x}", value);
}
[[fallthrough]];
case DSP_DSPA: case DSP_DSPA:
case DSP_DSMAH: case DSP_DSMAH:
case DSP_DSMAL: case DSP_DSMAL:
@ -161,6 +155,9 @@ void SDSP::WriteIFX(u32 address, u16 value)
case DSP_FORMAT: case DSP_FORMAT:
m_accelerator->SetSampleFormat(value); m_accelerator->SetSampleFormat(value);
break; break;
case DSP_GAIN:
m_accelerator->SetGain(value);
break;
case DSP_YN1: case DSP_YN1:
m_accelerator->SetYn1(value); m_accelerator->SetYn1(value);
break; break;
@ -170,8 +167,11 @@ void SDSP::WriteIFX(u32 address, u16 value)
case DSP_PRED_SCALE: case DSP_PRED_SCALE:
m_accelerator->SetPredScale(value); m_accelerator->SetPredScale(value);
break; break;
case DSP_ACDATA1: // Accelerator write (Zelda type) - "UnkZelda" case DSP_ACDRAW: // Raw accelerator write
m_accelerator->WriteD3(value); m_accelerator->WriteRaw(value);
break;
case DSP_ACIN:
m_accelerator->SetInput(value);
break; break;
default: default:
@ -231,16 +231,20 @@ u16 SDSP::ReadIFXImpl(u16 address)
return static_cast<u16>(m_accelerator->GetCurrentAddress()); return static_cast<u16>(m_accelerator->GetCurrentAddress());
case DSP_FORMAT: case DSP_FORMAT:
return m_accelerator->GetSampleFormat(); return m_accelerator->GetSampleFormat();
case DSP_GAIN:
return m_accelerator->GetGain();
case DSP_YN1: case DSP_YN1:
return m_accelerator->GetYn1(); return m_accelerator->GetYn1();
case DSP_YN2: case DSP_YN2:
return m_accelerator->GetYn2(); return m_accelerator->GetYn2();
case DSP_PRED_SCALE: case DSP_PRED_SCALE:
return m_accelerator->GetPredScale(); return m_accelerator->GetPredScale();
case DSP_ACCELERATOR: // ADPCM Accelerator reads case DSP_ACDSAMP: // Processed sample accelerator read
return m_accelerator->Read(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0])); return m_accelerator->ReadSample(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
case DSP_ACDATA1: // Accelerator reads (Zelda type) - "UnkZelda" case DSP_ACDRAW: // Raw accelerator read
return m_accelerator->ReadD3(); return m_accelerator->ReadRaw();
case DSP_ACIN:
return m_accelerator->GetInput();
default: default:
{ {

View File

@ -400,21 +400,21 @@ const std::array<pdlabel_t, 96> pdlabels =
{0xffcf, "DSMAL", "DSP DMA Mem Address L",}, {0xffcf, "DSMAL", "DSP DMA Mem Address L",},
{0xffd0, "0xffd0",nullptr,}, {0xffd0, "0xffd0",nullptr,},
{0xffd1, "SampleFormat", "SampleFormat",}, {0xffd1, "FORMAT", "Accelerator sample format",},
{0xffd2, "0xffd2",nullptr,}, {0xffd2, "0xffd2",nullptr,},
{0xffd3, "UnkZelda", "Unk Zelda reads/writes from/to it",}, {0xffd3, "ACDRAW", "Accelerator raw read/write from ARAM",},
{0xffd4, "ACSAH", "Accelerator start address H",}, {0xffd4, "ACSAH", "Accelerator start address H",},
{0xffd5, "ACSAL", "Accelerator start address L",}, {0xffd5, "ACSAL", "Accelerator start address L",},
{0xffd6, "ACEAH", "Accelerator end address H",}, {0xffd6, "ACEAH", "Accelerator end address H",},
{0xffd7, "ACEAL", "Accelerator end address L",}, {0xffd7, "ACEAL", "Accelerator end address L",},
{0xffd8, "ACCAH", "Accelerator current address H",}, {0xffd8, "ACCAH", "Accelerator current address H",},
{0xffd9, "ACCAL", "Accelerator current address L",}, {0xffd9, "ACCAL", "Accelerator current address L",},
{0xffda, "pred_scale", "pred_scale",}, {0xffda, "PRED_SCALE", "ADPCM predictor and scale",},
{0xffdb, "yn1", "yn1",}, {0xffdb, "YN1", "ADPCM output history Y[N - 1]",},
{0xffdc, "yn2", "yn2",}, {0xffdc, "YN2", "ADPCM output history Y[N - 2]",},
{0xffdd, "ARAM", "Direct Read from ARAM (uses ADPCM)",}, {0xffdd, "ACDSAMP", "Accelerator processed sample read from ARAM or ACIN",},
{0xffde, "GAIN", "Gain",}, {0xffde, "GAIN", "Gain",},
{0xffdf, "0xffdf", nullptr,}, {0xffdf, "ACIN", "Accelerator MMIO PCM input value",},
{0xffe0, "0xffe0",nullptr,}, {0xffe0, "0xffe0",nullptr,},
{0xffe1, "0xffe1",nullptr,}, {0xffe1, "0xffe1",nullptr,},

View File

@ -246,7 +246,7 @@ AESndAccelerator::AESndAccelerator(DSPManager& dsp) : m_dsp(dsp)
AESndAccelerator::~AESndAccelerator() = default; AESndAccelerator::~AESndAccelerator() = default;
void AESndAccelerator::OnEndException() void AESndAccelerator::OnSampleReadEndException()
{ {
// exception5 - this updates internal state // exception5 - this updates internal state
SetYn1(GetYn1()); SetYn1(GetYn1());
@ -266,12 +266,11 @@ void AESndAccelerator::WriteMemory(u32 address, u8 value)
static constexpr std::array<s16, 16> ACCELERATOR_COEFS = {}; // all zeros static constexpr std::array<s16, 16> ACCELERATOR_COEFS = {}; // all zeros
void AESndUCode::SetUpAccelerator(u16 format, [[maybe_unused]] u16 gain) void AESndUCode::SetUpAccelerator(u16 format, u16 gain)
{ {
// setup_accl // setup_accl
m_accelerator.SetSampleFormat(format); m_accelerator.SetSampleFormat(format);
// not currently implemented, but it doesn't matter since the gain is configured to be a no-op m_accelerator.SetGain(gain);
// m_accelerator.SetGain(gain);
m_accelerator.SetStartAddress(m_parameter_block.buf_start); m_accelerator.SetStartAddress(m_parameter_block.buf_start);
m_accelerator.SetEndAddress(m_parameter_block.buf_end); m_accelerator.SetEndAddress(m_parameter_block.buf_end);
m_accelerator.SetCurrentAddress(m_parameter_block.buf_curr); m_accelerator.SetCurrentAddress(m_parameter_block.buf_curr);
@ -372,7 +371,7 @@ void AESndUCode::DoMixing()
while (counter_h >= 1) while (counter_h >= 1)
{ {
counter_h--; counter_h--;
new_r = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
new_l = new_r; new_l = new_r;
} }
break; break;
@ -383,8 +382,8 @@ void AESndUCode::DoMixing()
while (counter_h >= 1) while (counter_h >= 1)
{ {
counter_h--; counter_h--;
new_r = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
new_l = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
} }
break; // falls through to mix_samples normally break; // falls through to mix_samples normally
@ -394,7 +393,7 @@ void AESndUCode::DoMixing()
while (counter_h >= 1) while (counter_h >= 1)
{ {
counter_h--; counter_h--;
new_r = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
new_l = new_r; new_l = new_r;
} }
new_r ^= 0x8000; new_r ^= 0x8000;
@ -407,8 +406,8 @@ void AESndUCode::DoMixing()
while (counter_h >= 1) while (counter_h >= 1)
{ {
counter_h--; counter_h--;
new_r = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
new_l = m_accelerator.Read(ACCELERATOR_COEFS.data()); new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
} }
new_r ^= 0x8000; new_r ^= 0x8000;
new_l ^= 0x8000; new_l ^= 0x8000;

View File

@ -30,7 +30,9 @@ public:
~AESndAccelerator(); ~AESndAccelerator();
protected: protected:
void OnEndException() override; void OnRawReadEndException() override {}
void OnRawWriteEndException() override {}
void OnSampleReadEndException() override;
u8 ReadMemory(u32 address) override; u8 ReadMemory(u32 address) override;
void WriteMemory(u32 address, u8 value) override; void WriteMemory(u32 address, u8 value) override;

View File

@ -134,7 +134,9 @@ public:
PB_TYPE* acc_pb = nullptr; PB_TYPE* acc_pb = nullptr;
protected: protected:
void OnEndException() override void OnRawReadEndException() override {}
void OnRawWriteEndException() override {}
void OnSampleReadEndException() override
{ {
if (acc_pb->audio_addr.looping) if (acc_pb->audio_addr.looping)
{ {
@ -181,6 +183,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
accelerator->SetSampleFormat(pb->audio_addr.sample_format); accelerator->SetSampleFormat(pb->audio_addr.sample_format);
accelerator->SetYn1(pb->adpcm.yn1); accelerator->SetYn1(pb->adpcm.yn1);
accelerator->SetYn2(pb->adpcm.yn2); accelerator->SetYn2(pb->adpcm.yn2);
accelerator->SetGain(pb->adpcm.gain);
accelerator->SetPredScale(pb->adpcm.pred_scale); accelerator->SetPredScale(pb->adpcm.pred_scale);
} }
@ -189,7 +192,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
// by the accelerator on real hardware). // by the accelerator on real hardware).
u16 AcceleratorGetSample(HLEAccelerator* accelerator) u16 AcceleratorGetSample(HLEAccelerator* accelerator)
{ {
return accelerator->Read(accelerator->acc_pb->adpcm.coefs); return accelerator->ReadSample(accelerator->acc_pb->adpcm.coefs);
} }
// Reads samples from the input callback, resamples them to <count> samples at // Reads samples from the input callback, resamples them to <count> samples at

View File

@ -9,15 +9,9 @@ lri $AC0.L, #0x0000 ; start
lri $AC1.M, #0x0000 ; end lri $AC1.M, #0x0000 ; end
lri $AC1.L, #0x0011 ; end lri $AC1.L, #0x0011 ; end
; Reset some registers
lri $AC0.H, #0xffff
sr @0xffda, $AC0.H ; pred scale
sr @0xffdb, $AC0.H ; yn1
sr @0xffdc, $AC0.H ; yn2
; Set the sample format ; Set the sample format
lri $AC0.H, #0x0 lri $AC0.H, #0x0
sr @0xffd1, $AC0.H sr @FORMAT, $AC0.H
; Set the starting and current address ; Set the starting and current address
srs @ACSAH, $AC0.M srs @ACSAH, $AC0.M
srs @ACCAH, $AC0.M srs @ACCAH, $AC0.M
@ -27,25 +21,32 @@ srs @ACCAL, $AC0.L
srs @ACEAH, $AC1.M srs @ACEAH, $AC1.M
srs @ACEAL, $AC1.L srs @ACEAL, $AC1.L
; Reset some registers (these must be reset after setting FORMAT)
lri $AC0.H, #0xffff
sr @PRED_SCALE, $AC0.H
sr @YN1, $AC0.H
sr @YN2, $AC0.H
call load_hw_reg_to_regs call load_hw_reg_to_regs
call send_back ; check the accelerator regs before a read call send_back ; check the accelerator regs before a read
bloopi #40, end_of_loop bloopi #40, end_of_loop
lr $IX3, @ARAM lr $IX3, @ACDSAMP
call load_hw_reg_to_regs call load_hw_reg_to_regs
call send_back ; after a read call send_back ; after a read
nop ; Loops that end at a return of a call are buggy on hw
end_of_loop: end_of_loop:
nop nop
jmp end_of_test jmp end_of_test
load_hw_reg_to_regs: load_hw_reg_to_regs:
lr $AR0, @0xffd1 ; format lr $AR0, @FORMAT
lr $AR1, @0xffd2 ; unknown lr $AR1, @0xffd2 ; unknown
lr $AR2, @0xffda ; pred scale lr $AR2, @PRED_SCALE
lr $AR3, @0xffdb ; yn1 lr $AR3, @YN1
lr $IX0, @0xffdc ; yn2 lr $IX0, @YN2
lr $IX1, @0xffdf ; unknown accelerator register lr $IX1, @ACIN
lri $AC0.H, #0 lri $AC0.H, #0
lrs $AC0.M, @ACSAH lrs $AC0.M, @ACSAH

View File

@ -0,0 +1,79 @@
incdir "tests"
include "dsp_base.inc"
test_main:
; Test parameters
lri $AC0.M, #0x0000 ; start
lri $AC0.L, #0x0000 ; start
lri $AC1.M, #0x0000 ; end
lri $AC1.L, #0x0011 ; end
; Set the sample format
lri $AC0.H, #0x08 ; 4-bit PCM, gain scaling = x / 2048
sr @FORMAT, $AC0.H
; Set the starting and current address
srs @ACSAH, $AC0.M
srs @ACCAH, $AC0.M
srs @ACSAL, $AC0.L
srs @ACCAL, $AC0.L
; Set the ending address
srs @ACEAH, $AC1.M
srs @ACEAL, $AC1.L
; Set the gains
si @GAIN, #0x0800 ; 2048 / 2048 = 1.0
si @COEF_A1_0, #0x0400 ; 1024 / 2048 = 0.5
si @COEF_A2_0, #0x0200 ; 512 / 2048 = 0.25
; Reset some registers (these must be reset after setting FORMAT)
lri $AC0.H, #0x0000
sr @PRED_SCALE, $AC0.H ; use 0th coefficients
sr @YN1, $AC0.H
sr @YN2, $AC0.H
call load_hw_reg_to_regs
call send_back ; check the accelerator regs before a read
; Expected read sequence
; r[0] = (data[0] >> 4) + (0/2) + (0/4)
; r[1] = (data[0] & 0xf) + (r[0]/2) + (0/4)
; r[2] = (data[1] >> 4) + (r[1]/2) + (r[0]/4)
; r[3] = (data[1] & 0xf) + (r[2]/2) + (r[1]/4)
; ...
bloopi #40, end_of_loop
lr $IX3, @ACDSAMP
call load_hw_reg_to_regs
call send_back ; after a read
nop ; Loops that end at a return of a call are buggy on hw
end_of_loop:
nop
jmp end_of_test
load_hw_reg_to_regs:
lr $AR0, @FORMAT
lr $AR1, @0xffd2 ; unknown
lr $AR2, @PRED_SCALE
lr $AR3, @YN1
lr $IX0, @YN2
lr $IX1, @ACIN
lri $AC0.H, #0
lrs $AC0.M, @ACSAH
lrs $AC0.L, @ACSAL
lri $AC1.H, #0
lrs $AC1.M, @ACEAH
lrs $AC1.L, @ACEAL
lrs $AX0.H, @ACCAH
lrs $AX0.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
ret

View File

@ -0,0 +1,112 @@
incdir "tests"
include "dsp_base.inc"
; To use: set up a buffer in main_spy,
; then modify the start, current, and ending addresses
; and verify things look correct
loop_read_test:
; Set the sample format
sr @FORMAT, $AC0.H
; Test parameters
lri $AC0.M, #0x0000 ; start
lri $AC0.L, #0x0000 ; start
lri $AC1.M, #0x0000 ; end
lri $AC1.L, #0x0011 ; end
; pred scale, coefs, etc do not matter for raw
; Set the starting and current address
srs @ACSAH, $AC0.M
srs @ACCAH, $AC0.M
srs @ACSAL, $AC0.L
srs @ACCAL, $AC0.L
; Set the ending address
srs @ACEAH, $AC1.M
srs @ACEAL, $AC1.L
call load_hw_reg_to_regs
call send_back ; check the accelerator regs before a read
bloopi #4, end_of_read_loop
lr $IX3, @ACDRAW ; Raw reads
call load_hw_reg_to_regs
call send_back ; after a read
nop
end_of_read_loop:
nop
ret
loop_write_test:
; Set the sample format
sr @FORMAT, $AC0.H
; Test parameters
lri $AC0.M, #0x0000 ; start
lri $AC0.L, #0x0000 ; start
lri $AC1.M, #0x0000 ; end
lri $AC1.L, #0x0011 ; end
; pred scale, coefs, etc do not matter for raw
; Set the starting and current address
srs @ACSAH, $AC0.M
srs @ACCAH, $AC0.M
srs @ACSAL, $AC0.L
srs @ACCAL, $AC0.L
; Set the ending address
srs @ACEAH, $AC1.M
srs @ACEAL, $AC1.L
call load_hw_reg_to_regs
call send_back ; check the accelerator regs before a write
bloopi #4, end_of_write_loop
sr @ACDRAW, $IX3 ; Raw writes
call load_hw_reg_to_regs
call send_back ; after a write
nop
end_of_write_loop:
nop
ret
test_main:
lri $AC0.H, #0x00 ; 4-bit
call loop_read_test
lri $AC0.H, #0x01 ; 8-bit
call loop_read_test
lri $AC0.H, #0x02 ; 16-bit
call loop_read_test
lri $AC0.H, #0x00 ; "4-bit", but all writes are 16-bits
call loop_write_test
lri $AC0.H, #0x01 ; "8-bit", but all writes are 16-bits
call loop_write_test
lri $AC0.H, #0x02 ; 16-bit
call loop_write_test
jmp end_of_test
load_hw_reg_to_regs:
lr $AR0, @FORMAT
lr $AR1, @0xffd2 ; unknown
lr $AR2, @PRED_SCALE
lr $AR3, @YN1
lr $IX0, @YN2
lr $IX1, @ACIN
lri $AC0.H, #0
lrs $AC0.M, @ACSAH
lrs $AC0.L, @ACSAL
lri $AC1.H, #0
lrs $AC1.M, @ACEAH
lrs $AC1.L, @ACEAL
lrs $AX0.H, @ACCAH
lrs $AX0.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL
ret

View File

@ -12,7 +12,7 @@ include "dsp_base.inc"
; AC1.M/L: end address ; AC1.M/L: end address
test_accelerator_addrs_ex: test_accelerator_addrs_ex:
; Set the sample format ; Set the sample format
sr @0xffd1, $AC0.H sr @FORMAT, $AC0.H
; Set the accelerator start and current address. ; Set the accelerator start and current address.
srs @ACSAH, $AC0.M srs @ACSAH, $AC0.M
@ -35,8 +35,8 @@ test_accelerator_addrs_ex:
lrs $AX0.L, @ACCAL lrs $AX0.L, @ACCAL
; Make the accelerator read memory ; Make the accelerator read memory
lrs $AX1.H, @ARAM lrs $AX1.H, @ACDSAMP
lrs $AX1.H, @ARAM lrs $AX1.H, @ACDSAMP
; AX1 -> new current position after read ; AX1 -> new current position after read
lrs $AX1.H, @ACCAH lrs $AX1.H, @ACCAH
lrs $AX1.L, @ACCAL lrs $AX1.L, @ACCAL

View File

@ -142,10 +142,10 @@ irq5:
si @DMBL, #0x0000 si @DMBL, #0x0000
si @DIRQ, #0x0001 si @DIRQ, #0x0001
lri $ac0.m, #0xbbbb lri $ac0.m, #0xbbbb
sr @0xffda, $ac0.m ; pred scale sr @PRED_SCALE, $ac0.m
sr @0xffdb, $ac0.m ; yn1 sr @YN1, $ac0.m
lr $ix2, @ARAM lr $ix2, @ACDSAMP
sr @0xffdc, $ac0.m ; yn2 sr @YN2, $ac0.m
rti rti
irq6: irq6:
lri $ac0.m, #0x0006 lri $ac0.m, #0x0006

View File

@ -18,7 +18,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX1.H, #0x100 LRI $AX1.H, #0x100
MULXMVZ $AX0.L, $AX1.H, $ACC0 ; UNSIGNED MULXMVZ $AX0.L, $AX1.H, $ACC0 ; UNSIGNED
@ -37,7 +37,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX1.H, #0x100 LRI $AX1.H, #0x100
MULXMV $AX0.L, $AX1.H, $ACC0 ; UNSIGNED MULXMV $AX0.L, $AX1.H, $ACC0 ; UNSIGNED
@ -56,7 +56,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX1.H, #0x100 LRI $AX1.H, #0x100
MULXAC $AX0.L, $AX1.H, $ACC0 ; UNSIGNED MULXAC $AX0.L, $AX1.H, $ACC0 ; UNSIGNED
@ -75,7 +75,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX1.H, #0x100 LRI $AX1.H, #0x100
MULX $AX0.L, $AX1.H ; UNSIGNED MULX $AX0.L, $AX1.H ; UNSIGNED
@ -95,7 +95,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX1.L, #0x100 LRI $AX1.L, #0x100
MADDX $AX0.L, $AX1.L ; SIGNED (!) MADDX $AX0.L, $AX1.L ; SIGNED (!)
@ -115,7 +115,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AC0.M, #0xFFFF LRI $AC0.M, #0xFFFF
LRI $AX0.H, #0x100 LRI $AX0.H, #0x100
MULC $AC0.M, $AX0.H ; SIGNED (!) MULC $AC0.M, $AX0.H ; SIGNED (!)
@ -135,7 +135,7 @@ call send_back
CLR $ACC0 CLR $ACC0
CLRP CLRP
SET15 SET15
LRI $AC0.M, #0xFFFF LRI $AC0.M, #0xFFFF
LRI $AX0.H, #0x100 LRI $AX0.H, #0x100
MULCAC $AC0.M, $AX0.H, $ACC0 ; SIGNED (!) MULCAC $AC0.M, $AX0.H, $ACC0 ; SIGNED (!)
@ -154,7 +154,7 @@ MOVP $ACC0
call send_back call send_back
CLR $ACC0 CLR $ACC0
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX0.H, #0x100 LRI $AX0.H, #0x100
MUL $AX0.L, $AX0.H ; SIGNED (!) MUL $AX0.L, $AX0.H ; SIGNED (!)
@ -173,7 +173,7 @@ MOVP $ACC0
call send_back call send_back
CLR $ACC0 CLR $ACC0
SET15 SET15
LRI $AX0.L, #0xFFFF LRI $AX0.L, #0xFFFF
LRI $AX0.H, #0x100 LRI $AX0.H, #0x100
MULAC $AX0.L, $AX0.H, $ACC0 ; SIGNED (!) MULAC $AX0.L, $AX0.H, $ACC0 ; SIGNED (!)
@ -187,65 +187,3 @@ CLR15
; We're done, DO NOT DELETE THIS LINE ; We're done, DO NOT DELETE THIS LINE
jmp end_of_test jmp end_of_test
; test accelerator
; TODO: DSPSpy puts a 16-bit ramp at 0x10000000
LRIS $AC1.M, #0x0a ; 16-bit PCM audio
;SRS @SampleFormat, $AC1.M
; Start accelerator position
LRI $AC1.M, #0x0100
SRS @ACCAH, $AC1.M
LRI $AC1.M, #0x1000
SRS @ACCAH, $AC1.M
; Current accelerator position
LRI $AC1.M, #0x0100
SRS @ACCAH, $AC1.M
LRI $AC1.M, #0x1000
SRS @ACCAH, $AC1.M
; End accelerator position
LRI $AC1.M, #0x0100
SRS @ACCAH, $AC1.M
LRI $AC1.M, #0x2000
SRS @ACCAH, $AC1.M
; Now to the interesting parameter - gain.
LRI $AC1.M, #0xFFFF
SRS @GAIN, $AC1.M
; Let's now load a sample through the accelerator.
LRS $AC1.M, @ARAM
call send_back
jmp end_of_test
; test addpaxz
call send_back
clrp
lri $AX0.L, #0x1111
lri $AX0.H, #0x2222
call send_back
clrp
addpaxz $ACC0, $AX0.H
call send_back
clrp
set40
addpaxz $ACC0, $AX0.H
set16
call send_back
clrp
set15
addpaxz $ACC0, $AX0.H
clr15
call send_back
jmp end_of_test

View File

@ -14,10 +14,10 @@ include "dsp_base_noirq.inc"
test_main: test_main:
; Use the accelerator to generate an IRQ by setting the start and end address to 0 ; Use the accelerator to generate an IRQ by setting the start and end address to 0
; This will result in an interrupt on every read ; This will result in an interrupt on every read
SI @0xffda, #0 ; pred_scale SI @PRED_SCALE, #0
SI @0xffdb, #0 ; yn1 SI @YN1, #0
SI @0xffdc, #0 ; yn2 SI @YN2, #0
SI @0xffd1, #0 ; SampleFormat SI @FORMAT, #0
SI @ACSAH, #0 SI @ACSAH, #0
SI @ACCAH, #0 SI @ACCAH, #0
SI @ACSAL, #0 SI @ACSAL, #0
@ -27,24 +27,24 @@ test_main:
LRI $AX1.H, #0x0000 LRI $AX1.H, #0x0000
LRS $AX0.L, @ARAM ; Trigger interrupt LRS $AX0.L, @ACDSAMP ; Trigger interrupt
CALL send_back CALL send_back
LRI $AX1.H, #0x0001 LRI $AX1.H, #0x0001
LRS $AX0.L, @ARAM ; Trigger interrupt LRS $AX0.L, @ACDSAMP ; Trigger interrupt
CALL send_back CALL send_back
LRI $AX1.H, #0x0000 LRI $AX1.H, #0x0000
LRS $AX0.L, @ARAM ; Trigger interrupt LRS $AX0.L, @ACDSAMP ; Trigger interrupt
CALL send_back CALL send_back
jmp end_of_test jmp end_of_test
accov_irq: accov_irq:
; Restore registers, otherwise no new interrupt will be generated ; Restore registers, otherwise no new interrupt will be generated
SI @0xffda, #0 ; pred_scale SI @PRED_SCALE, #0
SI @0xffdb, #0 ; yn1 SI @YN1, #0
SI @0xffdc, #0 ; yn2 SI @YN2, #0
TSTAXH $AX1.H TSTAXH $AX1.H
LRI $AX1.L, #0x1111 LRI $AX1.L, #0x1111

View File

@ -17,13 +17,15 @@ public:
{ {
std::array<s16, 16> coefs{}; std::array<s16, 16> coefs{};
m_accov_raised = false; m_accov_raised = false;
return Read(coefs.data()); return ReadSample(coefs.data());
} }
bool EndExceptionRaised() const { return m_accov_raised; } bool EndExceptionRaised() const { return m_accov_raised; }
protected: protected:
void OnEndException() override void OnRawReadEndException() override {}
void OnRawWriteEndException() override {}
void OnSampleReadEndException() override
{ {
EXPECT_TRUE(m_reads_stopped); EXPECT_TRUE(m_reads_stopped);
m_accov_raised = true; m_accov_raised = true;

View File

@ -46,7 +46,7 @@
% Document front page material % Document front page material
\title{\textbf{\Huge GameCube DSP User's Manual}} \title{\textbf{\Huge GameCube DSP User's Manual}}
\author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}} \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
\date{\today\\v0.1.5} \date{\today\\v0.1.7}
% Title formatting commands % Title formatting commands
\newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}} \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@ -263,6 +263,8 @@ The purpose of this documentation is purely academic and it aims at understandin
0.1.3 & 2022.05.27 & Pokechu22 & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH} \\ \hline 0.1.3 & 2022.05.27 & Pokechu22 & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH} \\ \hline
0.1.4 & 2022.06.02 & Pokechu22 & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline 0.1.4 & 2022.06.02 & Pokechu22 & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline
0.1.5 & 2022.09.29 & vpelletier & Fixed \texttt{BLOOP} and \texttt{BLOOPI} suboperation order \\ \hline 0.1.5 & 2022.09.29 & vpelletier & Fixed \texttt{BLOOP} and \texttt{BLOOPI} suboperation order \\ \hline
0.1.6 & 2022.06.20 & xperia64 & Accelerator documentation updates, fix register typo in ANDC and ORC descriptions \\ \hline
0.1.7 & 2025.04.21 & Tilka & Fixed typos and complained about GFDL \\ \hline
\end{tabular} \end{tabular}
\end{table} \end{table}
@ -387,7 +389,7 @@ You may not copy, modify, sublicense, or distribute the Document except as expre
The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See https://www.gnu.org/licenses/. The Free Software Foundation may publish new, revised versions of the GNU Free Documentation License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. See https://www.gnu.org/licenses/.
Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License "or any later version" applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation. Each version of the License is given a distinguishing version number. If the Document specifies that a particular numbered version of this License "or any later version" applies to it, you have the option of following the terms and conditions either of that specified version or of any later version that has been published (not as a draft) by the Free Software Foundation. If the Document does not specify a version number of this License, you may choose any version ever published (not as a draft) by the Free Software Foundation.
\pagebreak{} \pagebreak{}
@ -655,15 +657,15 @@ Exception vectors are located at address \Address{0x0000} in Instruction RAM.
\centering \centering
\begin{tabular}{|l|l|l|l|} \begin{tabular}{|l|l|l|l|}
\hline \hline
\textbf{Level} & \textbf{Address} & \textbf{Name} & \textbf{Description} \\ \hline \textbf{Level} & \textbf{Address} & \textbf{Name} & \textbf{Description} \\ \hline
0 & \Address{0x0000} & \texttt{RESET} & \\ \hline 0 & \Address{0x0000} & \texttt{RESET} & \\ \hline
1 & \Address{0x0002} & \texttt{STOVF} & Stack under/overflow \\ \hline 1 & \Address{0x0002} & \texttt{STOVF} & Stack under/overflow \\ \hline
2 & \Address{0x0004} & & \\ \hline 2 & \Address{0x0004} & & \\ \hline
3 & \Address{0x0006} & & \\ \hline 3 & \Address{0x0006} & \texttt{ACRROV} & Accelerator raw read address overflow \\ \hline
4 & \Address{0x0008} & & \\ \hline 4 & \Address{0x0008} & \texttt{ACRWOV} & Accelerator raw write address overflow \\ \hline
5 & \Address{0x000A} & \texttt{ACCOV} & Accelerator address overflow \\ \hline 5 & \Address{0x000A} & \texttt{ACSOV} & Accelerator sample read address overflow \\ \hline
6 & \Address{0x000C} & & \\ \hline 6 & \Address{0x000C} & & \\ \hline
7 & \Address{0x000E} & \texttt{INT} & External interrupt (from CPU) \\ \hline 7 & \Address{0x000E} & \texttt{INT} & External interrupt (from CPU) \\ \hline
\end{tabular} \end{tabular}
\end{table} \end{table}
@ -681,11 +683,11 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
\hline \hline
\textbf{Address} & \textbf{Name} & \textbf{Description} \\ \hline \textbf{Address} & \textbf{Name} & \textbf{Description} \\ \hline
\multicolumn{3}{|l|}{\textit{ADPCM Coefficients}} \\ \hline \multicolumn{3}{|l|}{\textit{ADPCM Coefficients}} \\ \hline
\Address{0xFFA0} & \Register{COEF\_A1\_0} & A1 Coefficient \# 0 \\ \hline \Address{0xFFA0} & \Register{COEF\_A1\_0} & A1 Coefficient \# 0 \\ \hline
\Address{0xFFA1} & \Register{COEF\_A2\_0} & A2 Coefficient \# 0 \\ \hline \Address{0xFFA1} & \Register{COEF\_A2\_0} & A2 Coefficient \# 0 \\ \hline
\multicolumn{3}{|c|}{$\vdots$} \\ \hline \multicolumn{3}{|c|}{$\vdots$} \\ \hline
\Address{0xFFAE} & \Register{COEF\_A1\_7} & A1 Coefficient \# 7 \\ \hline \Address{0xFFAE} & \Register{COEF\_A1\_7} & A1 Coefficient \# 7 \\ \hline
\Address{0xFFAF} & \Register{COEF\_A2\_7} & A2 Coefficient \# 7 \\ \hline \Address{0xFFAF} & \Register{COEF\_A2\_7} & A2 Coefficient \# 7 \\ \hline
\multicolumn{3}{|l|}{\textit{DMA Interface}} \\ \hline \multicolumn{3}{|l|}{\textit{DMA Interface}} \\ \hline
\Address{0xFFC9} & \Register{DSCR} & DMA control \\ \hline \Address{0xFFC9} & \Register{DSCR} & DMA control \\ \hline
\Address{0xFFCB} & \Register{DSBL} & Block length \\ \hline \Address{0xFFCB} & \Register{DSBL} & Block length \\ \hline
@ -695,19 +697,19 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
\multicolumn{3}{|l|}{\textit{Accelerator}} \\ \hline \multicolumn{3}{|l|}{\textit{Accelerator}} \\ \hline
\Address{0xFFD1} & \Register{FORMAT} & Accelerator sample format \\ \hline \Address{0xFFD1} & \Register{FORMAT} & Accelerator sample format \\ \hline
\Address{0xFFD2} & \Register{ACUNK1} & Unknown, usually 3 \\ \hline \Address{0xFFD2} & \Register{ACUNK1} & Unknown, usually 3 \\ \hline
\Address{0xFFD3} & \Register{ACDATA1} & Alternative ARAM interface \\ \hline \Address{0xFFD3} & \Register{ACDRAW} & Accelerator raw data \\ \hline
\Address{0xFFD4} & \Register{ACSAH} & Accelerator start address H \\ \hline \Address{0xFFD4} & \Register{ACSAH} & Accelerator start address H \\ \hline
\Address{0xFFD5} & \Register{ACSAL} & Accelerator start address L \\ \hline \Address{0xFFD5} & \Register{ACSAL} & Accelerator start address L \\ \hline
\Address{0xFFD6} & \Register{ACEAH} & Accelerator end address H \\ \hline \Address{0xFFD6} & \Register{ACEAH} & Accelerator end address H \\ \hline
\Address{0xFFD7} & \Register{ACEAL} & Accelerator end address L \\ \hline \Address{0xFFD7} & \Register{ACEAL} & Accelerator end address L \\ \hline
\Address{0xFFD8} & \Register{ACCAH} & Accelerator current address H \\ \hline \Address{0xFFD8} & \Register{ACCAH} & Accelerator current address H \\ \hline
\Address{0xFFD9} & \Register{ACCAL} & Accelerator current address L \\ \hline \Address{0xFFD9} & \Register{ACCAL} & Accelerator current address L \\ \hline
\Address{0xFFDA} & \Register{SCALE} & ADPCM predictor and scale \\ \hline \Address{0xFFDA} & \Register{PRED\_SCALE} & ADPCM predictor and scale \\ \hline
\Address{0xFFDB} & \Register{YN1} & ADPCM YN1 \\ \hline \Address{0xFFDB} & \Register{YN1} & ADPCM output history Y[N - 1] \\ \hline
\Address{0xFFDC} & \Register{YN2} & ADPCM YN2 \\ \hline \Address{0xFFDC} & \Register{YN2} & ADPCM output history Y[N - 2] \\ \hline
\Address{0xFFDD} & \Register{ACDAT} & Accelerator data \\ \hline \Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample \\ \hline
\Address{0xFFDE} & \Register{GAIN} & Gain \\ \hline \Address{0xFFDE} & \Register{GAIN} & Gain \\ \hline
\Address{0xFFDF} & \Register{ACUNK2} & Unknown, usually \Value{0x0C} \\ \hline \Address{0xFFDF} & \Register{ACIN} & Accelerator input \\ \hline
\Address{0xFFED} & \Register{AMDM} & ARAM DMA Request Mask \\ \hline \Address{0xFFED} & \Register{AMDM} & ARAM DMA Request Mask \\ \hline
\multicolumn{3}{|l|}{\textit{Interrupts}} \\ \hline \multicolumn{3}{|l|}{\textit{Interrupts}} \\ \hline
\Address{0xFFFB} & \Register{DIRQ} & IRQ request \\ \hline \Address{0xFFFB} & \Register{DIRQ} & IRQ request \\ \hline
@ -762,20 +764,29 @@ The GameCube DSP is connected to the memory bus through a DMA channel. DMA can b
\section{Accelerator} \section{Accelerator}
The accelerator is used to transfer data from accelerator memory (ARAM) to DSP memory. The accelerator area can be marked with \Register{ACSA} (start) and \Register{ACEA} (end) addresses. The accelerator is used to transfer data from accelerator memory (ARAM) to DSP memory. The accelerator area can be marked with \Register{ACSA} (start) and \Register{ACEA} (end) addresses.
Current address for the accelerator can be set or read from the \Register{ACCA} register. Reading from accelerator memory is done by reading from the \Register{ACDAT} register. Current address for the accelerator can be set or read from the \Register{ACCA} register. Accessing accelerator memory is done by reading or writing the \Register{ACDRAW} register for raw data, or reading the \Register{ACDSAMP} register for processed sample data.
This register contains data from ARAM pointed to by the \Register{ACCA} register. These registers contain raw or processed sample data from ARAM pointed to by the \Register{ACCA} register.
After reading the data, \Register{ACCA} is incremented by one. After reading the data, \Register{ACCA} is incremented by one.
After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and the \Exception{ACCOV} interrupt is generated. After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and an exception is generated. Raw reads generate exception \Exception{ACRROV}, raw writes generate exception \Exception{ACRWOV}, and sample reads generate exception \Exception{ACSOV}.
\RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{---- ---- --gg ddss}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{\begin{tabular}[c]{@{}l@{}} \RegisterBitDescription{5--4}{g}{R/W}{\begin{tabular}[c]{@{}l@{}}
\Value{0x00} - ADPCM audio \\ \Value{0} - PCM gain/coef scaling = 1/2048 \\
\Value{0x05} - u8 reads (D3) \\ \Value{1} - PCM gain/coef scaling = 1/1 \\
\Value{0x06} - u16 reads (D3) \\ \Value{2} - PCM gain/coef scaling = 1/65536 \\
\Value{0x0A} - 16-bit PCM audio, u16 writes (D3) \\ \end{tabular}}
\Value{0x19} - 8-bit PCM audio \RegisterBitDescription{3--2}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
\Value{0} - ADPCM decoding from ARAM \\
\Value{1} - PCM decoding from \Register{ACIN}, \Register{ACCA} doesn't increment \\
\Value{2} - PCM decoding from ARAM \\
\Value{3} - PCM decoding from \Register{ACIN}, \Register{ACCA} increments \\
\end{tabular}}
\RegisterBitDescription{1--0}{s}{R/W}{\begin{tabular}[c]{@{}l@{}}
\Value{0} - 4-bit \\
\Value{1} - 8-bit \\
\Value{2} - 16-bit \\
\end{tabular}} \end{tabular}}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
@ -785,10 +796,10 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
\RegisterBitDescription{15--0}{d}{R/W}{Usually 3} \RegisterBitDescription{15--0}{d}{R/W}{Usually 3}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFD3}{ACDATA1}{Alternative ARAM interface}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFD3}{ACDRAW}{Raw ARAM Access}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to data pointed to by current accelerator address, and then increments the current address. It is unclear whether this respects the start and end addresses.} \RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to raw data pointed to by current accelerator address, and then increments the current address. Reads respect the FORMAT size. Writes are always 16-bit and treat the addresses as such. Writes require that the uppermost bit of the current address is set. Reads that overflow the end address throw exception \Exception{ACRROV}. Writes that overflow throw exception \Exception{ACRWOV}.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFD4}{ACSAH}{Accelerator Start Address H}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFD4}{ACSAH}{Accelerator Start Address H}{dddd dddd dddd dddd}
@ -829,7 +840,7 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
\RegisterBitDescription{15--0}{d}{R/W}{Bits 15--0 of the accelerator current address} \RegisterBitDescription{15--0}{d}{R/W}{Bits 15--0 of the accelerator current address}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFDA}{SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss} \RegisterBitOverview{0xFFDA}{PRED\_SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{6--4}{d}{R/W}{Used to decide which pair of coefficients to use (\Register{COEF\_A1\_p} and \Register{COEF\_A2\_p}, at $\Address{0xFFA0} + 2p$ and $\Address{0xFFA0} + 2p + 1$)} \RegisterBitDescription{6--4}{d}{R/W}{Used to decide which pair of coefficients to use (\Register{COEF\_A1\_p} and \Register{COEF\_A2\_p}, at $\Address{0xFFA0} + 2p$ and $\Address{0xFFA0} + 2p + 1$)}
@ -841,31 +852,31 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
\RegisterBitOverview{0xFFDB}{YN1}{ADPCM YN1}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFDB}{YN1}{ADPCM YN1}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDAT} when \Register{ACDAT} is read. Used when calculating ADPCM, but updated for all sample formats.} \RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read. Used and updated for all sample formats. Multiplied by the A1 coefficient selected by PRED\_SCALE and scaled per FORMAT.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFDC}{YN1}{ADPCM YN2}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFDC}{YN2}{ADPCM YN2}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDAT} is read. Used when calculating ADPCM, but updated for all sample formats. Writing this value starts the accelerator.} \RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read. Used and updated for all sample formats. Multiplied by the A2 coefficient selected by PRED\_SCALE and scaled per FORMAT. Writing this value starts the accelerator.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFDD}{ACDAT}{Accelerator data}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFDD}{ACDSAMP}{Accelerator data}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R}{Reads new data from the accelerator. When there is no data left, returns 0.} \RegisterBitDescription{15--0}{d}{R}{Reads new proccessed sample data from the accelerator. Data is processed per FORMAT. When there is no data left, returns 0.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFDE}{GAIN}{Gain}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFDE}{GAIN}{Gain}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{Exact behavior unknown} \RegisterBitDescription{15--0}{d}{R/W}{Applied in PCM FORMATs. Raw sample is multiplied by GAIN, then scaled per the gain scale bits of FORMAT.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFDF}{ACUNK2}{Unknown 2}{dddd dddd dddd dddd} \RegisterBitOverview{0xFFDF}{ACIN}{Accelerator Input}{dddd dddd dddd dddd}
\begin{RegisterBitDescriptions} \begin{RegisterBitDescriptions}
\RegisterBitDescription{15--0}{d}{R/W}{Usually \Value{0x0C}} \RegisterBitDescription{15--0}{d}{R/W}{Used as the sample input in place of ARAM reads when FORMAT specifies it.}
\end{RegisterBitDescriptions} \end{RegisterBitDescriptions}
\RegisterBitOverview{0xFFEF}{AMDM}{ARAM DMA Request Mask}{---- ---- ---- ---m} \RegisterBitOverview{0xFFEF}{AMDM}{ARAM DMA Request Mask}{---- ---- ---- ---m}
@ -1006,7 +1017,7 @@ Functions used for describing opcode operation.
\begin{description} \begin{description}
\item \Function{PUSH\_STACK(\$stR)} \item \Function{PUSH\_STACK(\$stR)}
\begin{description} \begin{description}
\item \textbf{Description:} \\ \item \textbf{Description:} \\
Pushes value onto given stack referenced by stack register \Register{\$stR}. Operation moves values down in internal stack. Pushes value onto given stack referenced by stack register \Register{\$stR}. Operation moves values down in internal stack.
\item \textbf{Operation:} \\ \item \textbf{Operation:} \\
@ -1017,7 +1028,7 @@ Functions used for describing opcode operation.
\begin{description} \begin{description}
\item \Function{POP\_STACK(\$stR)} \item \Function{POP\_STACK(\$stR)}
\begin{description} \begin{description}
\item \textbf{Description:} \\ \item \textbf{Description:} \\
Pops value from stack referenced by stack register \Register{\$stR}. Operation moves values up in internal stack. Pops value from stack referenced by stack register \Register{\$stR}. Operation moves values up in internal stack.
\item \textbf{Operation:} \\ \item \textbf{Operation:} \\
@ -1358,7 +1369,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
\end{DSPOpcodeFormat} \end{DSPOpcodeFormat}
\begin{DSPOpcodeDescription} \begin{DSPOpcodeDescription}
\item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D)}.m. \item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D)}.m.
\end{DSPOpcodeDescription} \end{DSPOpcodeDescription}
\begin{DSPOpcodeOperation} \begin{DSPOpcodeOperation}
@ -3640,7 +3651,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
\end{DSPOpcodeFormat} \end{DSPOpcodeFormat}
\begin{DSPOpcodeDescription} \begin{DSPOpcodeDescription}
\item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D).m}. \item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D).m}.
\end{DSPOpcodeDescription} \end{DSPOpcodeDescription}
\begin{DSPOpcodeOperation} \begin{DSPOpcodeOperation}