Merge pull request #10766 from xperia64/dsp_format_fun

DSP accelerator improvements
2025-06-28 01:49:33 -06:00 · 2025-05-09 19:04:23 -04:00
parent 463bd86145 a6290caa2e
commit 0351fd56b1
18 changed files with 567 additions and 275 deletions
--- a/Source/Core/Core/DSP/DSPAccelerator.cpp
+++ b/Source/Core/Core/DSP/DSPAccelerator.cpp
@ -11,86 +11,152 @@

 namespace DSP
 {
-u16 Accelerator::ReadD3()
+u16 Accelerator::GetCurrentSample()
 {
  u16 val = 0;
-
-  switch (m_sample_format)
+  // The lower two bits of the sample format indicate the access size
+  switch (m_sample_format.size)
  {
-  case 0x5:  // u8 reads
+  case FormatSize::Size4Bit:
+    val = ReadMemory(m_current_address >> 1);
+    if (m_current_address & 1)
+      val &= 0xf;
+    else
+      val >>= 4;
+    break;
+  case FormatSize::Size8Bit:
    val = ReadMemory(m_current_address);
-    m_current_address++;
    break;
-  case 0x6:  // u16 reads
+  case FormatSize::Size16Bit:
    val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
-    m_current_address++;
    break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_aram_d3() - unknown format {:#x}", m_sample_format);
+  default:  // produces garbage, but affects the current address
+    ERROR_LOG_FMT(DSPLLE, "GetCurrentSample() - bad format {:#x}", m_sample_format.hex);
    break;
  }
-
-  if (m_current_address >= m_end_address)
-  {
-    // Set address back to start address. (never seen this here!)
-    m_current_address = m_start_address;
-  }
  return val;
 }

-void Accelerator::WriteD3(u16 value)
+u16 Accelerator::ReadRaw()
+{
+  u16 val = GetCurrentSample();
+  if (m_sample_format.size != FormatSize::SizeInvalid)
+  {
+    m_current_address++;
+  }
+  else
+  {
+    m_current_address = (m_current_address & ~3) | ((m_current_address + 1) & 3);
+  }
+
+  // There are edge cases that are currently not handled here in u4 and u8 mode
+  // In u8 mode, if ea & 1 == 0 and ca == ea + 1, the accelerator can be read twice. Upon the second
+  // read, the data returned appears to be the other half of the u16 from the first read, and the
+  // DSP resets the current address and throws exception 3
+
+  // During these reads, ca is not incremented normally.
+
+  // Instead, incrementing ca works like this: ca = (ca & ~ 3) | ((ca + 1) & 3)
+  // u4 mode extends this further.
+
+  // When ea & 3 == 0, and ca in [ea + 1, ea + 3], the accelerator can be read (4 - (ca - ea - 1))
+  // times. On the last read, the data returned appears to be the remaining nibble of the u16 from
+  // the first read, and the DSP resets the current address and throws exception 3
+
+  // When ea & 3 == 1, and ca in [ea + 1, ea + 2], the accelerator can be read (4 - (ca - ea - 1))
+  // times. On the last read, the data returned appears to be the remaining nibble of the u16 from
+  // the first read, and the DSP resets the current address and throws exception 3
+
+  // When ea & 3 == 2, and ca == ea + 1, the accelerator can be read 4 times. On the last read, the
+  // data returned appears to be the remaining nibble of the u16 from the first read, and the DSP
+  // resets the current address and throws exception 3
+
+  // There are extra extra edge cases if ca, ea, and potentially other registers are adjusted during
+  // this pre-reset phase
+
+  // The cleanest way to emulate the normal non-edge behavior is to only reset things if we just
+  // read the end address. If the current address is larger than the end address (and not in the
+  // edge range), it ignores the end address
+  if (m_current_address - 1 == m_end_address)
+  {
+    // Set address back to start address (confirmed on hardware)
+    m_current_address = m_start_address;
+    OnRawReadEndException();
+  }
+
+  SetCurrentAddress(m_current_address);
+  return val;
+}
+
+void Accelerator::WriteRaw(u16 value)
 {
  // Zelda ucode writes a bunch of zeros to ARAM through d3 during
  // initialization.  Don't know if it ever does it later, too.
  // Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too)
  // Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too)

-  switch (m_sample_format)
+  // Writes only seem to be accepted when the upper most bit of the address is set
+  if (m_current_address & 0x80000000)
  {
-  case 0xA:  // u16 writes
+    // The format doesn't matter for raw writes; all writes are u16 and the address is treated as if
+    // we are in a 16-bit format
    WriteMemory(m_current_address * 2, value >> 8);
    WriteMemory(m_current_address * 2 + 1, value & 0xFF);
    m_current_address++;
-    break;
-  default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_write_aram_d3() - unknown format {:#x}", m_sample_format);
-    break;
+    OnRawWriteEndException();
+  }
+  else
+  {
+    ERROR_LOG_FMT(DSPLLE, "WriteRaw() - tried to write to address {:#x} without high bit set",
+                  m_current_address);
  }
 }

-u16 Accelerator::Read(const s16* coefs)
+u16 Accelerator::ReadSample(const s16* coefs)
 {
  if (m_reads_stopped)
    return 0x0000;

-  u16 val;
-  u8 step_size_bytes = 0;
+  if (m_sample_format.unk != 0)
+  {
+    WARN_LOG_FMT(DSPLLE, "ReadSample() format {:#x} has unknown upper bits set",
+                 m_sample_format.hex);
+  }

-  // let's do the "hardware" decode DSP_FORMAT is interesting - the Zelda
-  // ucode seems to indicate that the bottom two bits specify the "read size"
-  // and the address multiplier.  The bits above that may be things like sign
-  // extension and do/do not use ADPCM.  It also remains to be figured out
-  // whether there's a difference between the usual accelerator "read
-  // address" and 0xd3.
-  switch (m_sample_format)
+  u16 val = 0;
+  u8 step_size = 0;
+  s16 raw_sample;
+  if (m_sample_format.decode == FormatDecode::MMIOPCMNoInc ||
+      m_sample_format.decode == FormatDecode::MMIOPCMInc)
  {
-  case 0x00:  // ADPCM audio
+    // The addresses can be complete nonsense in either of these modes
+    raw_sample = m_input;
+  }
+  else
  {
-    int scale = 1 << (m_pred_scale & 0xF);
+    raw_sample = GetCurrentSample();
+  }
+
  int coef_idx = (m_pred_scale >> 4) & 0x7;

  s32 coef1 = coefs[coef_idx * 2 + 0];
  s32 coef2 = coefs[coef_idx * 2 + 1];

-    int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) :
-                                         (ReadMemory(m_current_address >> 1) >> 4);
+  switch (m_sample_format.decode)
+  {
+  case FormatDecode::ADPCM:  // ADPCM audio
+  {
+    // ADPCM really only supports 4-bit decoding, but for larger values on hardware, it just ignores
+    // the upper 12 bits
+    raw_sample &= 0xF;
+    int scale = 1 << (m_pred_scale & 0xF);

-    if (temp >= 8)
-      temp -= 16;
+    if (raw_sample >= 8)
+      raw_sample -= 16;

-    s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
+    s32 val32 = (scale * raw_sample) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
    val = static_cast<s16>(std::clamp<s32>(val32, -0x7FFF, 0x7FFF));
-    step_size_bytes = 2;
+    step_size = 2;

    m_yn2 = m_yn1;
    m_yn1 = val;
@ -100,6 +166,7 @@ u16 Accelerator::Read(const s16* coefs)
    // the ACCOV exception does not fire at all, the predscale register is not updated,
    // and if the end address is 16-byte aligned, the DSP loops to start_address + 1
    // instead of start_address.
+    // TODO: This probably needs to be adjusted when using 8 or 16-bit accesses.
    if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
    {
      m_current_address = m_start_address + 1;
@ -113,47 +180,54 @@ u16 Accelerator::Read(const s16* coefs)
    {
      m_pred_scale = ReadMemory((m_current_address & ~15) >> 1);
      m_current_address += 2;
-      step_size_bytes += 2;
+      step_size += 2;
    }
    break;
  }
-  case 0x0A:  // 16-bit PCM audio
-    val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
-    m_yn2 = m_yn1;
-    m_yn1 = val;
-    step_size_bytes = 2;
-    m_current_address += 1;
+  case FormatDecode::MMIOPCMNoInc:
+  case FormatDecode::PCM:  // 16-bit PCM audio
+  case FormatDecode::MMIOPCMInc:
+  {
+    // Gain seems to only apply for PCM decoding
+    u8 gain_shift = 0;
+    switch (m_sample_format.gain_scale)
+    {
+    case FormatGainScale::GainScale2048:
+      gain_shift = 11;  // x / 2048 = x >> 11
      break;
-  case 0x19:  // 8-bit PCM audio
-    val = ReadMemory(m_current_address) << 8;
-    m_yn2 = m_yn1;
-    m_yn1 = val;
-    step_size_bytes = 2;
-    m_current_address += 1;
+    case FormatGainScale::GainScale1:
+      gain_shift = 0;  // x / 1 = x >> 0
+      break;
+    case FormatGainScale::GainScale65536:
+      gain_shift = 16;  // x / 65536 = x >> 16
      break;
    default:
-    ERROR_LOG_FMT(DSPLLE, "dsp_read_accelerator() - unknown format {:#x}", m_sample_format);
-    step_size_bytes = 2;
-    m_current_address += 1;
-    val = 0;
+      ERROR_LOG_FMT(DSPLLE, "ReadSample() invalid gain mode in format {:#x}", m_sample_format.hex);
      break;
    }
-
-  // TODO: Take GAIN into account
-  // adpcm = 0, pcm8 = 0x100, pcm16 = 0x800
-  // games using pcm8 : Phoenix Wright Ace Attorney (WiiWare), Megaman 9-10 (WiiWare)
-  // games using pcm16: GC Sega games, ...
+    s32 val32 = ((static_cast<s32>(m_gain) * raw_sample) >> gain_shift) +
+                (((coef1 * m_yn1) >> gain_shift) + ((coef2 * m_yn2) >> gain_shift));
+    val = static_cast<s16>(val32);
+    m_yn2 = m_yn1;
+    m_yn1 = val;
+    step_size = 2;
+    if (m_sample_format.decode != FormatDecode::MMIOPCMNoInc)
+    {
+      m_current_address += 1;
+    }
+    break;
+  }
+  }

  // Check for loop.
-  // Somehow, YN1 and YN2 must be initialized with their "loop" values,
-  // so yeah, it seems likely that we should raise an exception to let
-  // the DSP program do that, at least if DSP_FORMAT == 0x0A.
-  if (m_current_address == (m_end_address + step_size_bytes - 1))
+  // YN1 and YN2 need to be initialized with their "loop" values,
+  // which is usually done upon this exception.
+  if (m_current_address == (m_end_address + step_size - 1))
  {
    // Set address back to start address.
    m_current_address = m_start_address;
    m_reads_stopped = true;
-    OnEndException();
+    OnSampleReadEndException();
  }

  SetCurrentAddress(m_current_address);
@ -192,7 +266,12 @@ void Accelerator::SetCurrentAddress(u32 address)

 void Accelerator::SetSampleFormat(u16 format)
 {
-  m_sample_format = format;
+  m_sample_format.hex = format;
+}
+
+void Accelerator::SetGain(s16 gain)
+{
+  m_gain = gain;
 }

 void Accelerator::SetYn1(s16 yn1)
@ -210,4 +289,10 @@ void Accelerator::SetPredScale(u16 pred_scale)
 {
  m_pred_scale = pred_scale & 0x7f;
 }
+
+void Accelerator::SetInput(u16 input)
+{
+  m_input = input;
+}
+
 }  // namespace DSP
--- a/Source/Core/Core/DSP/DSPAccelerator.h
+++ b/Source/Core/Core/DSP/DSPAccelerator.h
@ -3,6 +3,7 @@

 #pragma once

+#include "Common/BitField.h"
 #include "Common/CommonTypes.h"

 class PointerWrap;
@ -14,41 +15,85 @@ class Accelerator
 public:
  virtual ~Accelerator() = default;

-  u16 Read(const s16* coefs);
+  u16 ReadSample(const s16* coefs);
  // Zelda ucode reads ARAM through 0xffd3.
-  u16 ReadD3();
-  void WriteD3(u16 value);
+  u16 ReadRaw();
+  void WriteRaw(u16 value);

  u32 GetStartAddress() const { return m_start_address; }
  u32 GetEndAddress() const { return m_end_address; }
  u32 GetCurrentAddress() const { return m_current_address; }
-  u16 GetSampleFormat() const { return m_sample_format; }
+  u16 GetSampleFormat() const { return m_sample_format.hex; }
+  s16 GetGain() const { return m_gain; }
  s16 GetYn1() const { return m_yn1; }
  s16 GetYn2() const { return m_yn2; }
  u16 GetPredScale() const { return m_pred_scale; }
+  u16 GetInput() const { return m_input; }
  void SetStartAddress(u32 address);
  void SetEndAddress(u32 address);
  void SetCurrentAddress(u32 address);
  void SetSampleFormat(u16 format);
+  void SetGain(s16 gain);
  void SetYn1(s16 yn1);
  void SetYn2(s16 yn2);
  void SetPredScale(u16 pred_scale);
+  void SetInput(u16 input);

  void DoState(PointerWrap& p);

 protected:
-  virtual void OnEndException() = 0;
+  virtual void OnRawReadEndException() = 0;
+  virtual void OnRawWriteEndException() = 0;
+  virtual void OnSampleReadEndException() = 0;
  virtual u8 ReadMemory(u32 address) = 0;
  virtual void WriteMemory(u32 address, u8 value) = 0;
+  u16 GetCurrentSample();

  // DSP accelerator registers.
  u32 m_start_address = 0;
  u32 m_end_address = 0;
  u32 m_current_address = 0;
-  u16 m_sample_format = 0;
+
+  enum class FormatSize : u16
+  {
+    Size4Bit = 0,
+    Size8Bit = 1,
+    Size16Bit = 2,
+    SizeInvalid = 3
+  };
+
+  enum class FormatDecode : u16
+  {
+    ADPCM = 0,         // ADPCM reads from ARAM, ACCA increments
+    MMIOPCMNoInc = 1,  // PCM Reads from ACIN, ACCA doesn't increment
+    PCM = 2,           // PCM reads from ARAM, ACCA increments
+    MMIOPCMInc = 3     // PCM reads from ACIN, ACCA increments
+  };
+
+  // When reading samples (at least in PCM mode), they are multiplied by the gain, then divided by
+  // the value specified here
+  enum class FormatGainScale : u16
+  {
+    GainScale2048 = 0,
+    GainScale1 = 1,
+    GainScale65536 = 2,
+    GainScaleInvalid = 3
+  };
+
+  union SampleFormat
+  {
+    u16 hex;
+    BitField<0, 2, FormatSize> size;
+    BitField<2, 2, FormatDecode> decode;
+    BitField<4, 2, FormatGainScale> gain_scale;
+    BitField<6, 10, u16> unk;
+  } m_sample_format{0};
+
+  s16 m_gain = 0;
  s16 m_yn1 = 0;
  s16 m_yn2 = 0;
  u16 m_pred_scale = 0;
+  u16 m_input = 0;

  // When an ACCOV is triggered, the accelerator stops reading back anything
  // and updating the current address register, unless the YN2 register is written to.
--- a/Source/Core/Core/DSP/DSPCore.cpp
+++ b/Source/Core/Core/DSP/DSPCore.cpp
@ -109,7 +109,18 @@ public:
 protected:
  u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); }
  void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); }
-  void OnEndException() override { m_dsp.SetException(ExceptionType::AcceleratorOverflow); }
+  void OnRawReadEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorRawReadOverflow);
+  }
+  void OnRawWriteEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorRawWriteOverflow);
+  }
+  void OnSampleReadEndException() override
+  {
+    m_dsp.SetException(ExceptionType::AcceleratorSampleReadOverflow);
+  }

 private:
  SDSP& m_dsp;
--- a/Source/Core/Core/DSP/DSPCore.h
+++ b/Source/Core/Core/DSP/DSPCore.h
@ -154,7 +154,7 @@ enum : u32

  DSP_FORMAT = 0xd1,  // Sample format
  DSP_ACUNK = 0xd2,   // Set to 3 on my dumps
-  DSP_ACDATA1 = 0xd3,  // Used only by Zelda ucodes
+  DSP_ACDRAW = 0xd3,  // Raw accelerator accesses
  DSP_ACSAH = 0xd4,   // Start of loop
  DSP_ACSAL = 0xd5,
  DSP_ACEAH = 0xd6,  // End of sample (and loop)
@ -164,9 +164,9 @@ enum : u32
  DSP_PRED_SCALE = 0xda,  // ADPCM predictor and scale
  DSP_YN1 = 0xdb,
  DSP_YN2 = 0xdc,
-  DSP_ACCELERATOR = 0xdd,  // ADPCM accelerator read. Used by AX.
+  DSP_ACDSAMP = 0xdd,  // Accelerator sample reads, processed differently depending on FORMAT
  DSP_GAIN = 0xde,
-  DSP_ACUNK2 = 0xdf,  // Set to 0xc on my dumps
+  DSP_ACIN = 0xdf,  // Feeds PCM samples written here

  DSP_AMDM = 0xef,  // ARAM DMA Request Mask 0: DMA with ARAM unmasked 1: masked

@ -228,9 +228,9 @@ enum class ExceptionType
 {
  StackOverflow = 1,                  // 0x0002 stack under/over flow
  EXP_2 = 2,                          // 0x0004
-  EXP_3 = 3,                // 0x0006
-  EXP_4 = 4,                // 0x0008
-  AcceleratorOverflow = 5,  // 0x000a accelerator address overflow
+  AcceleratorRawReadOverflow = 3,     // 0x0006 accelerator raw read address overflow
+  AcceleratorRawWriteOverflow = 4,    // 0x0008 accelerator raw write address overflow
+  AcceleratorSampleReadOverflow = 5,  // 0x000a accelerator sample reads address overflow
  EXP_6 = 6,                          // 0x000c
  ExternalInterrupt = 7               // 0x000e external int (message from CPU)
 };
--- a/Source/Core/Core/DSP/DSPHWInterface.cpp
+++ b/Source/Core/Core/DSP/DSPHWInterface.cpp
@ -122,12 +122,6 @@ void SDSP::WriteIFX(u32 address, u16 value)
    m_ifx_regs[DSP_DSBL] = 0;
    break;

-  case DSP_GAIN:
-    if (value != 0)
-    {
-      DEBUG_LOG_FMT(DSPLLE, "Gain Written: {:#06x}", value);
-    }
-    [[fallthrough]];
  case DSP_DSPA:
  case DSP_DSMAH:
  case DSP_DSMAL:
@ -161,6 +155,9 @@ void SDSP::WriteIFX(u32 address, u16 value)
  case DSP_FORMAT:
    m_accelerator->SetSampleFormat(value);
    break;
+  case DSP_GAIN:
+    m_accelerator->SetGain(value);
+    break;
  case DSP_YN1:
    m_accelerator->SetYn1(value);
    break;
@ -170,8 +167,11 @@ void SDSP::WriteIFX(u32 address, u16 value)
  case DSP_PRED_SCALE:
    m_accelerator->SetPredScale(value);
    break;
-  case DSP_ACDATA1:  // Accelerator write (Zelda type) - "UnkZelda"
-    m_accelerator->WriteD3(value);
+  case DSP_ACDRAW:  // Raw accelerator write
+    m_accelerator->WriteRaw(value);
+    break;
+  case DSP_ACIN:
+    m_accelerator->SetInput(value);
    break;

  default:
@ -231,16 +231,20 @@ u16 SDSP::ReadIFXImpl(u16 address)
    return static_cast<u16>(m_accelerator->GetCurrentAddress());
  case DSP_FORMAT:
    return m_accelerator->GetSampleFormat();
+  case DSP_GAIN:
+    return m_accelerator->GetGain();
  case DSP_YN1:
    return m_accelerator->GetYn1();
  case DSP_YN2:
    return m_accelerator->GetYn2();
  case DSP_PRED_SCALE:
    return m_accelerator->GetPredScale();
-  case DSP_ACCELERATOR:  // ADPCM Accelerator reads
-    return m_accelerator->Read(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
-  case DSP_ACDATA1:  // Accelerator reads (Zelda type) - "UnkZelda"
-    return m_accelerator->ReadD3();
+  case DSP_ACDSAMP:  // Processed sample accelerator read
+    return m_accelerator->ReadSample(reinterpret_cast<s16*>(&m_ifx_regs[DSP_COEF_A1_0]));
+  case DSP_ACDRAW:  // Raw accelerator read
+    return m_accelerator->ReadRaw();
+  case DSP_ACIN:
+    return m_accelerator->GetInput();

  default:
  {
--- a/Source/Core/Core/DSP/DSPTables.cpp
+++ b/Source/Core/Core/DSP/DSPTables.cpp
@ -400,21 +400,21 @@ const std::array<pdlabel_t, 96> pdlabels =
  {0xffcf, "DSMAL", "DSP DMA Mem Address L",},

  {0xffd0, "0xffd0",nullptr,},
-  {0xffd1, "SampleFormat", "SampleFormat",},
+  {0xffd1, "FORMAT", "Accelerator sample format",},
  {0xffd2, "0xffd2",nullptr,},
-  {0xffd3, "UnkZelda", "Unk Zelda reads/writes from/to it",},
+  {0xffd3, "ACDRAW", "Accelerator raw read/write from ARAM",},
  {0xffd4, "ACSAH", "Accelerator start address H",},
  {0xffd5, "ACSAL", "Accelerator start address L",},
  {0xffd6, "ACEAH", "Accelerator end address H",},
  {0xffd7, "ACEAL", "Accelerator end address L",},
  {0xffd8, "ACCAH", "Accelerator current address H",},
  {0xffd9, "ACCAL", "Accelerator current address L",},
-  {0xffda, "pred_scale", "pred_scale",},
-  {0xffdb, "yn1", "yn1",},
-  {0xffdc, "yn2", "yn2",},
-  {0xffdd, "ARAM", "Direct Read from ARAM (uses ADPCM)",},
+  {0xffda, "PRED_SCALE", "ADPCM predictor and scale",},
+  {0xffdb, "YN1", "ADPCM output history Y[N - 1]",},
+  {0xffdc, "YN2", "ADPCM output history Y[N - 2]",},
+  {0xffdd, "ACDSAMP", "Accelerator processed sample read from ARAM or ACIN",},
  {0xffde, "GAIN", "Gain",},
-  {0xffdf, "0xffdf", nullptr,},
+  {0xffdf, "ACIN", "Accelerator MMIO PCM input value",},

  {0xffe0, "0xffe0",nullptr,},
  {0xffe1, "0xffe1",nullptr,},
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.cpp
@ -246,7 +246,7 @@ AESndAccelerator::AESndAccelerator(DSPManager& dsp) : m_dsp(dsp)

 AESndAccelerator::~AESndAccelerator() = default;

-void AESndAccelerator::OnEndException()
+void AESndAccelerator::OnSampleReadEndException()
 {
  // exception5 - this updates internal state
  SetYn1(GetYn1());
@ -266,12 +266,11 @@ void AESndAccelerator::WriteMemory(u32 address, u8 value)

 static constexpr std::array<s16, 16> ACCELERATOR_COEFS = {};  // all zeros

-void AESndUCode::SetUpAccelerator(u16 format, [[maybe_unused]] u16 gain)
+void AESndUCode::SetUpAccelerator(u16 format, u16 gain)
 {
  // setup_accl
  m_accelerator.SetSampleFormat(format);
-  // not currently implemented, but it doesn't matter since the gain is configured to be a no-op
-  // m_accelerator.SetGain(gain);
+  m_accelerator.SetGain(gain);
  m_accelerator.SetStartAddress(m_parameter_block.buf_start);
  m_accelerator.SetEndAddress(m_parameter_block.buf_end);
  m_accelerator.SetCurrentAddress(m_parameter_block.buf_curr);
@ -372,7 +371,7 @@ void AESndUCode::DoMixing()
          while (counter_h >= 1)
          {
            counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
            new_l = new_r;
          }
          break;
@ -383,8 +382,8 @@ void AESndUCode::DoMixing()
          while (counter_h >= 1)
          {
            counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
-            new_l = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
+            new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
          }
          break;  // falls through to mix_samples normally

@ -394,7 +393,7 @@ void AESndUCode::DoMixing()
          while (counter_h >= 1)
          {
            counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
            new_l = new_r;
          }
          new_r ^= 0x8000;
@ -407,8 +406,8 @@ void AESndUCode::DoMixing()
          while (counter_h >= 1)
          {
            counter_h--;
-            new_r = m_accelerator.Read(ACCELERATOR_COEFS.data());
-            new_l = m_accelerator.Read(ACCELERATOR_COEFS.data());
+            new_r = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
+            new_l = m_accelerator.ReadSample(ACCELERATOR_COEFS.data());
          }
          new_r ^= 0x8000;
          new_l ^= 0x8000;
--- a/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AESnd.h
@ -30,7 +30,9 @@ public:
  ~AESndAccelerator();

 protected:
-  void OnEndException() override;
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override;
  u8 ReadMemory(u32 address) override;
  void WriteMemory(u32 address, u8 value) override;

--- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
+++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h
@ -134,7 +134,9 @@ public:
  PB_TYPE* acc_pb = nullptr;

 protected:
-  void OnEndException() override
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override
  {
    if (acc_pb->audio_addr.looping)
    {
@ -181,6 +183,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
  accelerator->SetSampleFormat(pb->audio_addr.sample_format);
  accelerator->SetYn1(pb->adpcm.yn1);
  accelerator->SetYn2(pb->adpcm.yn2);
+  accelerator->SetGain(pb->adpcm.gain);
  accelerator->SetPredScale(pb->adpcm.pred_scale);
 }

@ -189,7 +192,7 @@ void AcceleratorSetup(HLEAccelerator* accelerator, PB_TYPE* pb)
 // by the accelerator on real hardware).
 u16 AcceleratorGetSample(HLEAccelerator* accelerator)
 {
-  return accelerator->Read(accelerator->acc_pb->adpcm.coefs);
+  return accelerator->ReadSample(accelerator->acc_pb->adpcm.coefs);
 }

 // Reads samples from the input callback, resamples them to <count> samples at
--- a/Source/DSPSpy/tests/accelerator_loop_test.ds
+++ b/Source/DSPSpy/tests/accelerator_loop_test.ds
@ -9,15 +9,9 @@ lri $AC0.L, #0x0000 ; start
 lri $AC1.M, #0x0000 ; end
 lri $AC1.L, #0x0011 ; end

-; Reset some registers
-lri $AC0.H, #0xffff
-sr @0xffda, $AC0.H ; pred scale
-sr @0xffdb, $AC0.H ; yn1
-sr @0xffdc, $AC0.H ; yn2
-
 ; Set the sample format
 lri $AC0.H, #0x0
-sr @0xffd1, $AC0.H
+sr @FORMAT, $AC0.H
 ; Set the starting and current address
 srs @ACSAH, $AC0.M
 srs @ACCAH, $AC0.M
@ -27,25 +21,32 @@ srs @ACCAL, $AC0.L
 srs @ACEAH, $AC1.M
 srs @ACEAL, $AC1.L

+; Reset some registers (these must be reset after setting FORMAT)
+lri $AC0.H, #0xffff
+sr @PRED_SCALE, $AC0.H
+sr @YN1, $AC0.H
+sr @YN2, $AC0.H
+
 call load_hw_reg_to_regs
 call send_back ; check the accelerator regs before a read

 bloopi #40, end_of_loop
-  lr $IX3, @ARAM
+  lr $IX3, @ACDSAMP
  call load_hw_reg_to_regs
  call send_back ; after a read
+  nop ; Loops that end at a return of a call are buggy on hw
 end_of_loop:
  nop

 jmp end_of_test

 load_hw_reg_to_regs:
-  lr $AR0, @0xffd1 ; format
+  lr $AR0, @FORMAT
  lr $AR1, @0xffd2 ; unknown
-  lr $AR2, @0xffda ; pred scale
-  lr $AR3, @0xffdb ; yn1
-  lr $IX0, @0xffdc ; yn2
-  lr $IX1, @0xffdf ; unknown accelerator register
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN

  lri $AC0.H, #0
  lrs $AC0.M, @ACSAH
--- a/Source/DSPSpy/tests/accelerator_pcm_test.ds
+++ b/Source/DSPSpy/tests/accelerator_pcm_test.ds
@ -0,0 +1,79 @@
+incdir  "tests"
+include "dsp_base.inc"
+
+test_main:
+
+; Test parameters
+lri $AC0.M, #0x0000 ; start
+lri $AC0.L, #0x0000 ; start
+lri $AC1.M, #0x0000 ; end
+lri $AC1.L, #0x0011 ; end
+
+; Set the sample format
+lri $AC0.H, #0x08 ; 4-bit PCM, gain scaling = x / 2048
+sr @FORMAT, $AC0.H
+; Set the starting and current address
+srs @ACSAH, $AC0.M
+srs @ACCAH, $AC0.M
+srs @ACSAL, $AC0.L
+srs @ACCAL, $AC0.L
+; Set the ending address
+srs @ACEAH, $AC1.M
+srs @ACEAL, $AC1.L
+
+; Set the gains
+si @GAIN, #0x0800 ; 2048 / 2048 = 1.0
+si @COEF_A1_0, #0x0400 ; 1024 / 2048 = 0.5
+si @COEF_A2_0, #0x0200 ; 512 / 2048 = 0.25
+
+; Reset some registers (these must be reset after setting FORMAT)
+lri $AC0.H, #0x0000
+sr @PRED_SCALE, $AC0.H ; use 0th coefficients
+sr @YN1, $AC0.H
+sr @YN2, $AC0.H
+
+call load_hw_reg_to_regs
+call send_back ; check the accelerator regs before a read
+
+; Expected read sequence
+; r[0] = (data[0] >> 4) + (0/2) + (0/4)
+; r[1] = (data[0] & 0xf) + (r[0]/2) + (0/4)
+; r[2] = (data[1] >> 4) + (r[1]/2) + (r[0]/4)
+; r[3] = (data[1] & 0xf) + (r[2]/2) + (r[1]/4)
+; ...
+
+bloopi #40, end_of_loop
+  lr $IX3, @ACDSAMP
+  call load_hw_reg_to_regs
+  call send_back ; after a read
+  nop ; Loops that end at a return of a call are buggy on hw
+end_of_loop:
+  nop
+
+jmp end_of_test
+
+load_hw_reg_to_regs:
+  lr $AR0, @FORMAT
+  lr $AR1, @0xffd2 ; unknown
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN
+
+  lri $AC0.H, #0
+  lrs $AC0.M, @ACSAH
+  lrs $AC0.L, @ACSAL
+
+  lri $AC1.H, #0
+  lrs $AC1.M, @ACEAH
+  lrs $AC1.L, @ACEAL
+
+  lrs $AX0.H, @ACCAH
+  lrs $AX0.L, @ACCAL
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  ret
--- a/Source/DSPSpy/tests/accelerator_raw_test.ds
+++ b/Source/DSPSpy/tests/accelerator_raw_test.ds
@ -0,0 +1,112 @@
+incdir  "tests"
+include "dsp_base.inc"
+
+; To use: set up a buffer in main_spy,
+; then modify the start, current, and ending addresses
+; and verify things look correct
+loop_read_test:
+  ; Set the sample format
+  sr @FORMAT, $AC0.H
+
+  ; Test parameters
+  lri $AC0.M, #0x0000 ; start
+  lri $AC0.L, #0x0000 ; start
+  lri $AC1.M, #0x0000 ; end
+  lri $AC1.L, #0x0011 ; end
+
+  ; pred scale, coefs, etc do not matter for raw
+
+  ; Set the starting and current address
+  srs @ACSAH, $AC0.M
+  srs @ACCAH, $AC0.M
+  srs @ACSAL, $AC0.L
+  srs @ACCAL, $AC0.L
+  ; Set the ending address
+  srs @ACEAH, $AC1.M
+  srs @ACEAL, $AC1.L
+
+  call load_hw_reg_to_regs
+  call send_back ; check the accelerator regs before a read
+
+  bloopi #4, end_of_read_loop
+    lr $IX3, @ACDRAW ; Raw reads
+    call load_hw_reg_to_regs
+    call send_back ; after a read
+    nop
+end_of_read_loop:
+    nop
+    ret
+
+loop_write_test:
+  ; Set the sample format
+  sr @FORMAT, $AC0.H
+
+  ; Test parameters
+  lri $AC0.M, #0x0000 ; start
+  lri $AC0.L, #0x0000 ; start
+  lri $AC1.M, #0x0000 ; end
+  lri $AC1.L, #0x0011 ; end
+
+  ; pred scale, coefs, etc do not matter for raw
+
+  ; Set the starting and current address
+  srs @ACSAH, $AC0.M
+  srs @ACCAH, $AC0.M
+  srs @ACSAL, $AC0.L
+  srs @ACCAL, $AC0.L
+  ; Set the ending address
+  srs @ACEAH, $AC1.M
+  srs @ACEAL, $AC1.L
+
+  call load_hw_reg_to_regs
+  call send_back ; check the accelerator regs before a write
+
+  bloopi #4, end_of_write_loop
+    sr @ACDRAW, $IX3 ; Raw writes
+    call load_hw_reg_to_regs
+    call send_back ; after a write
+    nop
+end_of_write_loop:
+    nop
+    ret
+
+test_main:
+lri $AC0.H, #0x00 ; 4-bit
+call loop_read_test
+lri $AC0.H, #0x01 ; 8-bit
+call loop_read_test
+lri $AC0.H, #0x02 ; 16-bit
+call loop_read_test
+lri $AC0.H, #0x00 ; "4-bit", but all writes are 16-bits
+call loop_write_test
+lri $AC0.H, #0x01 ; "8-bit", but all writes are 16-bits
+call loop_write_test
+lri $AC0.H, #0x02 ; 16-bit
+call loop_write_test
+jmp end_of_test
+
+load_hw_reg_to_regs:
+  lr $AR0, @FORMAT
+  lr $AR1, @0xffd2 ; unknown
+  lr $AR2, @PRED_SCALE
+  lr $AR3, @YN1
+  lr $IX0, @YN2
+  lr $IX1, @ACIN
+
+  lri $AC0.H, #0
+  lrs $AC0.M, @ACSAH
+  lrs $AC0.L, @ACSAL
+
+  lri $AC1.H, #0
+  lrs $AC1.M, @ACEAH
+  lrs $AC1.L, @ACEAL
+
+  lrs $AX0.H, @ACCAH
+  lrs $AX0.L, @ACCAL
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  lrs $AX1.H, @ACCAH
+  lrs $AX1.L, @ACCAL
+
+  ret
--- a/Source/DSPSpy/tests/accelerator_test.ds
+++ b/Source/DSPSpy/tests/accelerator_test.ds
@ -12,7 +12,7 @@ include "dsp_base.inc"
 ;   AC1.M/L: end address
 test_accelerator_addrs_ex:
  ; Set the sample format
-  sr @0xffd1, $AC0.H
+  sr @FORMAT, $AC0.H

  ; Set the accelerator start and current address.
  srs @ACSAH, $AC0.M
@ -35,8 +35,8 @@ test_accelerator_addrs_ex:
  lrs $AX0.L, @ACCAL

  ; Make the accelerator read memory
-  lrs $AX1.H, @ARAM
-  lrs $AX1.H, @ARAM
+  lrs $AX1.H, @ACDSAMP
+  lrs $AX1.H, @ACDSAMP
  ; AX1 -> new current position after read
  lrs $AX1.H, @ACCAH
  lrs $AX1.L, @ACCAL
--- a/Source/DSPSpy/tests/dsp_base_noirq.inc
+++ b/Source/DSPSpy/tests/dsp_base_noirq.inc
@ -142,10 +142,10 @@ irq5:
 	si		@DMBL, #0x0000
 	si		@DIRQ, #0x0001
 	lri		$ac0.m, #0xbbbb
-	sr		@0xffda, $ac0.m ; pred scale
-	sr		@0xffdb, $ac0.m ; yn1
-	lr		$ix2, @ARAM
-	sr		@0xffdc, $ac0.m ; yn2
+	sr		@PRED_SCALE, $ac0.m
+	sr		@YN1, $ac0.m
+	lr		$ix2, @ACDSAMP
+	sr		@YN2, $ac0.m
 	rti
 irq6:
 	lri		$ac0.m, #0x0006
--- a/Source/DSPSpy/tests/mul_test.ds
+++ b/Source/DSPSpy/tests/mul_test.ds
@ -187,65 +187,3 @@ CLR15

 ; We're done, DO NOT DELETE THIS LINE
 jmp end_of_test
-
-; test accelerator
-
-; TODO: DSPSpy puts a 16-bit ramp at 0x10000000
-LRIS $AC1.M, #0x0a    ; 16-bit PCM audio
-;SRS	@SampleFormat, $AC1.M
-; Start accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x1000
-SRS @ACCAH, $AC1.M
-; Current accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x1000
-SRS @ACCAH, $AC1.M
-; End accelerator position
-LRI $AC1.M, #0x0100
-SRS @ACCAH, $AC1.M
-LRI $AC1.M, #0x2000
-SRS @ACCAH, $AC1.M
-
-; Now to the interesting parameter - gain.
-LRI $AC1.M, #0xFFFF
-SRS @GAIN, $AC1.M
-
-; Let's now load a sample through the accelerator.
-LRS $AC1.M, @ARAM
-call send_back
-
-jmp end_of_test
-
-; test addpaxz
-call send_back
-
-clrp
-lri $AX0.L, #0x1111
-lri $AX0.H, #0x2222
-call send_back
-
-clrp
-addpaxz $ACC0, $AX0.H
-
-call send_back
-
-clrp
-set40
-addpaxz $ACC0, $AX0.H
-set16
-
-call send_back
-
-clrp
-set15
-addpaxz $ACC0, $AX0.H
-clr15
-
-call send_back
-
-
-jmp end_of_test
-
--- a/Source/DSPSpy/tests/rti_test.ds
+++ b/Source/DSPSpy/tests/rti_test.ds
@ -14,10 +14,10 @@ include "dsp_base_noirq.inc"
 test_main:
 	; Use the accelerator to generate an IRQ by setting the start and end address to 0
 	; This will result in an interrupt on every read
-	SI @0xffda, #0 ; pred_scale
-	SI @0xffdb, #0 ; yn1
-	SI @0xffdc, #0 ; yn2
-	SI @0xffd1, #0 ; SampleFormat
+	SI @PRED_SCALE, #0
+	SI @YN1, #0
+	SI @YN2, #0
+	SI @FORMAT, #0
 	SI @ACSAH, #0
 	SI @ACCAH, #0
 	SI @ACSAL, #0
@ -27,24 +27,24 @@ test_main:


 	LRI $AX1.H, #0x0000
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back

 	LRI $AX1.H, #0x0001
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back

 	LRI $AX1.H, #0x0000
-	LRS $AX0.L, @ARAM ; Trigger interrupt
+	LRS $AX0.L, @ACDSAMP ; Trigger interrupt
 	CALL send_back

 	jmp end_of_test

 accov_irq:
 	; Restore registers, otherwise no new interrupt will be generated
-	SI @0xffda, #0 ; pred_scale
-	SI @0xffdb, #0 ; yn1
-	SI @0xffdc, #0 ; yn2
+	SI @PRED_SCALE, #0
+	SI @YN1, #0
+	SI @YN2, #0

 	TSTAXH $AX1.H
 	LRI $AX1.L, #0x1111
--- a/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
+++ b/Source/UnitTests/Core/DSP/DSPAcceleratorTest.cpp
@ -17,13 +17,15 @@ public:
  {
    std::array<s16, 16> coefs{};
    m_accov_raised = false;
-    return Read(coefs.data());
+    return ReadSample(coefs.data());
  }

  bool EndExceptionRaised() const { return m_accov_raised; }

 protected:
-  void OnEndException() override
+  void OnRawReadEndException() override {}
+  void OnRawWriteEndException() override {}
+  void OnSampleReadEndException() override
  {
    EXPECT_TRUE(m_reads_stopped);
    m_accov_raised = true;
--- a/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
+++ b/docs/DSP/GameCube_DSP_Users_Manual/GameCube_DSP_Users_Manual.tex
@ -46,7 +46,7 @@
 % Document front page material
 \title{\textbf{\Huge GameCube DSP User's Manual}}
 \author{Reverse-engineered and documented by Duddie \\ \href{mailto:duddie@walla.com}{duddie@walla.com}}
-\date{\today\\v0.1.5}
+\date{\today\\v0.1.7}

 % Title formatting commands
 \newcommand{\OpcodeTitle}[1]{\subsection{#1}\label{instruction:#1}}
@ -263,6 +263,8 @@ The purpose of this documentation is purely academic and it aims at understandin
 0.1.3            & 2022.05.27    & Pokechu22       & Renamed \texttt{CMPAR} instruction to \texttt{CMPAXH}                                    \\ \hline
 0.1.4            & 2022.06.02    & Pokechu22       & Fixed typos; added sections on 16-bit and 40-bit modes and on main and extended opcode writing to the same register. \\ \hline
 0.1.5            & 2022.09.29    & vpelletier      & Fixed \texttt{BLOOP} and \texttt{BLOOPI} suboperation order \\ \hline
+0.1.6            & 2022.06.20    & xperia64        & Accelerator documentation updates, fix register typo in ANDC and ORC descriptions        \\ \hline
+0.1.7            & 2025.04.21    & Tilka           & Fixed typos and complained about GFDL                                                    \\ \hline
 \end{tabular}
 \end{table}

@ -659,9 +661,9 @@ Exception vectors are located at address \Address{0x0000} in Instruction RAM.
 0              & \Address{0x0000} & \texttt{RESET}  &                                           \\ \hline
 1              & \Address{0x0002} & \texttt{STOVF}  & Stack under/overflow                      \\ \hline
 2              & \Address{0x0004} &                 &                                           \\ \hline
-3              & \Address{0x0006} &                &                               \\ \hline
-4              & \Address{0x0008} &                &                               \\ \hline
-5              & \Address{0x000A} & \texttt{ACCOV} & Accelerator address overflow  \\ \hline
+3              & \Address{0x0006} & \texttt{ACRROV} & Accelerator raw read address overflow     \\ \hline
+4              & \Address{0x0008} & \texttt{ACRWOV} & Accelerator raw write address overflow    \\ \hline
+5              & \Address{0x000A} & \texttt{ACSOV}  & Accelerator sample read address overflow  \\ \hline
 6              & \Address{0x000C} &                 &                                           \\ \hline
 7              & \Address{0x000E} & \texttt{INT}    & External interrupt (from CPU)             \\ \hline
 \end{tabular}
@ -695,19 +697,19 @@ Hardware registers (IFX) occupy the address space at \Address{0xFFxx} in the Dat
 \multicolumn{3}{|l|}{\textit{Accelerator}}                            \\ \hline
 \Address{0xFFD1} & \Register{FORMAT}  & Accelerator sample format     \\ \hline
 \Address{0xFFD2} & \Register{ACUNK1}  & Unknown, usually 3            \\ \hline
-\Address{0xFFD3} & \Register{ACDATA1} & Alternative ARAM interface    \\ \hline
+\Address{0xFFD3} & \Register{ACDRAW}  & Accelerator raw data          \\ \hline
 \Address{0xFFD4} & \Register{ACSAH}   & Accelerator start address H   \\ \hline
 \Address{0xFFD5} & \Register{ACSAL}   & Accelerator start address L   \\ \hline
 \Address{0xFFD6} & \Register{ACEAH}   & Accelerator end address H     \\ \hline
 \Address{0xFFD7} & \Register{ACEAL}   & Accelerator end address L     \\ \hline
 \Address{0xFFD8} & \Register{ACCAH}   & Accelerator current address H \\ \hline
 \Address{0xFFD9} & \Register{ACCAL}   & Accelerator current address L \\ \hline
-\Address{0xFFDA} & \Register{SCALE}   & ADPCM predictor and scale     \\ \hline
-\Address{0xFFDB} & \Register{YN1}     & ADPCM YN1                     \\ \hline
-\Address{0xFFDC} & \Register{YN2}     & ADPCM YN2                     \\ \hline
-\Address{0xFFDD} & \Register{ACDAT}   & Accelerator data              \\ \hline
+\Address{0xFFDA} & \Register{PRED\_SCALE} & ADPCM predictor and scale \\ \hline
+\Address{0xFFDB} & \Register{YN1}     & ADPCM output history Y[N - 1] \\ \hline
+\Address{0xFFDC} & \Register{YN2}     & ADPCM output history Y[N - 2] \\ \hline
+\Address{0xFFDD} & \Register{ACDSAMP} & Accelerator processed sample  \\ \hline
 \Address{0xFFDE} & \Register{GAIN}    & Gain                          \\ \hline
-\Address{0xFFDF} & \Register{ACUNK2}  & Unknown, usually \Value{0x0C} \\ \hline
+\Address{0xFFDF} & \Register{ACIN}    & Accelerator input             \\ \hline
 \Address{0xFFED} & \Register{AMDM}    & ARAM DMA Request Mask         \\ \hline
 \multicolumn{3}{|l|}{\textit{Interrupts}}                             \\ \hline
 \Address{0xFFFB} & \Register{DIRQ}    & IRQ request                   \\ \hline
@ -762,20 +764,29 @@ The GameCube DSP is connected to the memory bus through a DMA channel. DMA can b
 \section{Accelerator}

 The accelerator is used to transfer data from accelerator memory (ARAM) to DSP memory. The accelerator area can be marked with \Register{ACSA} (start) and \Register{ACEA} (end) addresses.
-Current address for the accelerator can be set or read from the \Register{ACCA} register. Reading from accelerator memory is done by reading from the \Register{ACDAT} register.
-This register contains data from ARAM pointed to by the \Register{ACCA} register.
+Current address for the accelerator can be set or read from the \Register{ACCA} register. Accessing accelerator memory is done by reading or writing the \Register{ACDRAW} register for raw data, or reading the \Register{ACDSAMP} register for processed sample data.
+These registers contain raw or processed sample data from ARAM pointed to by the \Register{ACCA} register.
 After reading the data, \Register{ACCA} is incremented by one.
-After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and the \Exception{ACCOV} interrupt is generated.
+After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA}, it gets reset to a value from \Register{ACSA} and an exception is generated. Raw reads generate exception \Exception{ACRROV}, raw writes generate exception \Exception{ACRWOV}, and sample reads generate exception \Exception{ACSOV}.

-\RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFD1}{FORMAT}{Accelerator sample format}{---- ---- --gg ddss}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
-\Value{0x00} - ADPCM audio \\
-\Value{0x05} - u8 reads (D3) \\
-\Value{0x06} - u16 reads (D3) \\
-\Value{0x0A} - 16-bit PCM audio, u16 writes (D3) \\
-\Value{0x19} - 8-bit PCM audio
+\RegisterBitDescription{5--4}{g}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - PCM gain/coef scaling = 1/2048 \\
+\Value{1} - PCM gain/coef scaling = 1/1 \\
+\Value{2} - PCM gain/coef scaling = 1/65536 \\
+\end{tabular}}
+\RegisterBitDescription{3--2}{d}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - ADPCM decoding from ARAM \\
+\Value{1} - PCM decoding from \Register{ACIN}, \Register{ACCA} doesn't increment \\
+\Value{2} - PCM decoding from ARAM \\
+\Value{3} - PCM decoding from \Register{ACIN}, \Register{ACCA} increments \\
+\end{tabular}}
+\RegisterBitDescription{1--0}{s}{R/W}{\begin{tabular}[c]{@{}l@{}}
+\Value{0} - 4-bit \\
+\Value{1} - 8-bit \\
+\Value{2} - 16-bit \\
 \end{tabular}}
 \end{RegisterBitDescriptions}

@ -785,10 +796,10 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitDescription{15--0}{d}{R/W}{Usually 3}
 \end{RegisterBitDescriptions}

-\RegisterBitOverview{0xFFD3}{ACDATA1}{Alternative ARAM interface}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFD3}{ACDRAW}{Raw ARAM Access}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to data pointed to by current accelerator address, and then increments the current address.  It is unclear whether this respects the start and end addresses.}
+\RegisterBitDescription{15--0}{d}{R/W}{Reads from or writes to raw data pointed to by current accelerator address, and then increments the current address.  Reads respect the FORMAT size. Writes are always 16-bit and treat the addresses as such.  Writes require that the uppermost bit of the current address is set.  Reads that overflow the end address throw exception \Exception{ACRROV}.  Writes that overflow throw exception \Exception{ACRWOV}.}
 \end{RegisterBitDescriptions}

 \RegisterBitOverview{0xFFD4}{ACSAH}{Accelerator Start Address H}{dddd dddd dddd dddd}
@ -829,7 +840,7 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitDescription{15--0}{d}{R/W}{Bits 15--0 of the accelerator current address}
 \end{RegisterBitDescriptions}

-\RegisterBitOverview{0xFFDA}{SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss}
+\RegisterBitOverview{0xFFDA}{PRED\_SCALE}{ADPCM predictor and scale}{---- ---- -ppp ssss}

 \begin{RegisterBitDescriptions}
 \RegisterBitDescription{6--4}{d}{R/W}{Used to decide which pair of coefficients to use (\Register{COEF\_A1\_p} and \Register{COEF\_A2\_p}, at $\Address{0xFFA0} + 2p$ and $\Address{0xFFA0} + 2p + 1$)}
@ -841,31 +852,31 @@ After \Register{ACCA} grows bigger than the area pointed to by \Register{ACEA},
 \RegisterBitOverview{0xFFDB}{YN1}{ADPCM YN1}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDAT} when \Register{ACDAT} is read.  Used when calculating ADPCM, but updated for all sample formats.}
+\RegisterBitDescription{15--0}{d}{R/W}{Last value read by the accelerator, updated to the new value of \Register{ACDSAMP} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A1 coefficient selected by PRED\_SCALE and scaled per FORMAT.}
 \end{RegisterBitDescriptions}

-\RegisterBitOverview{0xFFDC}{YN1}{ADPCM YN2}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDC}{YN2}{ADPCM YN2}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDAT} is read.  Used when calculating ADPCM, but updated for all sample formats.  Writing this value starts the accelerator.}
+\RegisterBitDescription{15--0}{d}{R/W}{Second-last value read by the accelerator, updated to the previous value of \Register{YN1} when \Register{ACDSAMP} is read.  Used and updated for all sample formats.  Multiplied by the A2 coefficient selected by PRED\_SCALE and scaled per FORMAT.  Writing this value starts the accelerator.}
 \end{RegisterBitDescriptions}

-\RegisterBitOverview{0xFFDD}{ACDAT}{Accelerator data}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDD}{ACDSAMP}{Accelerator data}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R}{Reads new data from the accelerator.  When there is no data left, returns 0.}
+\RegisterBitDescription{15--0}{d}{R}{Reads new proccessed sample data from the accelerator.  Data is processed per FORMAT.  When there is no data left, returns 0.}
 \end{RegisterBitDescriptions}

 \RegisterBitOverview{0xFFDE}{GAIN}{Gain}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Exact behavior unknown}
+\RegisterBitDescription{15--0}{d}{R/W}{Applied in PCM FORMATs.  Raw sample is multiplied by GAIN, then scaled per the gain scale bits of FORMAT.}
 \end{RegisterBitDescriptions}

-\RegisterBitOverview{0xFFDF}{ACUNK2}{Unknown 2}{dddd dddd dddd dddd}
+\RegisterBitOverview{0xFFDF}{ACIN}{Accelerator Input}{dddd dddd dddd dddd}

 \begin{RegisterBitDescriptions}
-\RegisterBitDescription{15--0}{d}{R/W}{Usually \Value{0x0C}}
+\RegisterBitDescription{15--0}{d}{R/W}{Used as the sample input in place of ARAM reads when FORMAT specifies it.}
 \end{RegisterBitDescriptions}

 \RegisterBitOverview{0xFFEF}{AMDM}{ARAM DMA Request Mask}{---- ---- ---- ---m}
@ -1358,7 +1369,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
  \end{DSPOpcodeFormat}

  \begin{DSPOpcodeDescription}
-    \item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D)}.m.
+    \item Logic AND middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D)}.m.
  \end{DSPOpcodeDescription}

  \begin{DSPOpcodeOperation}
@ -3640,7 +3651,7 @@ A ``-'' indicates that the flag retains its previous value, a ``0'' indicates th
  \end{DSPOpcodeFormat}

  \begin{DSPOpcodeDescription}
-    \item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ax(1-D).m}.
+    \item Logic OR middle part of accumulator \Register{\$acD.m} with middle part of accumulator \Register{\$ac(1-D).m}.
  \end{DSPOpcodeDescription}

  \begin{DSPOpcodeOperation}