diff --git a/Data/Sys/GC/dsp_rom.bin b/Data/Sys/GC/dsp_rom.bin index 34a454df85..a4895bd176 100644 Binary files a/Data/Sys/GC/dsp_rom.bin and b/Data/Sys/GC/dsp_rom.bin differ diff --git a/Source/Core/Core/DSP/DSPCore.cpp b/Source/Core/Core/DSP/DSPCore.cpp index 48cc0256c8..609e4e3d77 100644 --- a/Source/Core/Core/DSP/DSPCore.cpp +++ b/Source/Core/Core/DSP/DSPCore.cpp @@ -33,25 +33,28 @@ static bool VerifyRoms(const SDSP& dsp) u32 hash_drom; // dsp_coef.bin }; - static const std::array known_roms = {{ + static const std::array known_roms = {{ // Official Nintendo ROM {0x66f334fe, 0xf3b93527}, - // LM1234 replacement ROM (Zelda UCode only) + // v0.1: LM1234 replacement ROM (Zelda UCode only) {0x9c8f593c, 0x10000001}, - // delroth's improvement on LM1234 replacement ROM (Zelda and AX only, + // v0.2: delroth's improvement on LM1234 replacement ROM (Zelda and AX only, // IPL/Card/GBA still broken) {0xd9907f71, 0xb019c2fb}, - // above with improved resampling coefficients + // v0.2.1: above with improved resampling coefficients {0xd9907f71, 0xdb6880c1}, - // above with support for GBA ucode + // v0.3: above with support for GBA ucode {0x3aa4a793, 0xa4a575f5}, - // above with fix to skip bootucode_ax when running from ROM entrypoint + // v0.3.1: above with fix to skip bootucode_ax when running from ROM entrypoint {0x128ea7a2, 0xa4a575f5}, + + // v0.4: above with fixes for invalid use of SRS instruction + {0xe789b5a5, 0xa4a575f5}, }}; const u32 hash_irom = @@ -69,28 +72,30 @@ static bool VerifyRoms(const SDSP& dsp) if (rom_idx < 0) { - if (AskYesNoFmtT("Your DSP ROMs have incorrect hashes.\n" + if (AskYesNoFmtT("Your DSP ROMs have incorrect hashes.\n\n" + "Delete the dsp_rom.bin and dsp_coef.bin files in the GC folder in the Global " + "User Directory to use the free DSP ROM, or replace them with good dumps from " + "a real GameCube/Wii.\n\n" "Would you like to stop now to fix the problem?\n" "If you select \"No\", audio might be garbled.")) + { return false; + } } - if (rom_idx == 1) + if (rom_idx >= 1 && rom_idx <= 5) { - Host::OSD_AddMessage("You are using an old free DSP ROM made by the Dolphin Team.", 6000); - Host::OSD_AddMessage("Only games using the Zelda UCode will work correctly.", 6000); - } - else if (rom_idx == 2 || rom_idx == 3) - { - Host::OSD_AddMessage("You are using a free DSP ROM made by the Dolphin Team.", 8000); - Host::OSD_AddMessage("All Wii games will work correctly, and most GameCube games", 8000); - Host::OSD_AddMessage("should also work fine, but the GBA/CARD UCodes will not work.", 8000); - } - else if (rom_idx == 4) - { - Host::OSD_AddMessage("You are using a free DSP ROM made by the Dolphin Team.", 8000); - Host::OSD_AddMessage("All Wii games will work correctly, and most GameCube games", 8000); - Host::OSD_AddMessage("should also work fine, but the CARD UCode will not work.", 8000); + if (AskYesNoFmtT( + "You are using an old free DSP ROM made by the Dolphin Team.\n" + "Due to emulation accuracy improvements, this ROM no longer works correctly.\n\n" + "Delete the dsp_rom.bin and dsp_coef.bin files in the GC folder in the Global " + "User Directory to use the most recent free DSP ROM, or replace them with " + "good dumps from a real GameCube/Wii.\n\n" + "Would you like to stop now to fix the problem?\n" + "If you select \"No\", audio might be garbled.")) + { + return false; + } } return true; diff --git a/Source/Core/Core/DSP/DSPTables.cpp b/Source/Core/Core/DSP/DSPTables.cpp index 133fe570ed..7f1ab00a6b 100644 --- a/Source/Core/Core/DSP/DSPTables.cpp +++ b/Source/Core/Core/DSP/DSPTables.cpp @@ -18,7 +18,7 @@ namespace DSP { // clang-format off -const std::array s_opcodes = +const std::array s_opcodes = {{ // # of parameters----+ {type, size, loc, lshift, mask} branch reads PC // instruction approximation // name opcode mask size-V V param 1 param 2 param 3 extendable uncond. updates SR @@ -48,7 +48,22 @@ const std::array s_opcodes = {"RETO", 0x02de, 0xffff, 1, 0, {}, false, true, false, true, false}, // return if overflow {"RET", 0x02df, 0xffff, 1, 0, {}, false, true, true, false, false}, // unconditional return - {"RTI", 0x02ff, 0xffff, 1, 0, {}, false, true, true, false, false}, // return from interrupt + {"RTIGE", 0x02f0, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if greater or equal + {"RTIL", 0x02f1, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if less + {"RTIG", 0x02f2, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if greater + {"RTILE", 0x02f3, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if less or equal + {"RTINZ", 0x02f4, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if not zero + {"RTIZ", 0x02f5, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if zero + {"RTINC", 0x02f6, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if not carry + {"RTIC", 0x02f7, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if carry + {"RTIx8", 0x02f8, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if TODO + {"RTIx9", 0x02f9, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if TODO + {"RTIxA", 0x02fa, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if TODO + {"RTIxB", 0x02fb, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if TODO + {"RTILNZ", 0x02fc, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if logic not zero + {"RTILZ", 0x02fd, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if logic zero + {"RTIO", 0x02fe, 0xffff, 1, 0, {}, false, true, false, true, false}, // return from interrupt if overflow + {"RTI", 0x02ff, 0xffff, 1, 0, {}, false, true, true, false, false}, // return from interrupt unconditionally {"CALLGE", 0x02b0, 0xffff, 2, 1, {{P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, // call if greater or equal {"CALLL", 0x02b1, 0xffff, 2, 1, {{P_ADDR_I, 2, 1, 0, 0xffff}}, false, true, false, true, false}, // call if less @@ -192,7 +207,8 @@ const std::array s_opcodes = //2 {"LRS", 0x2000, 0xf800, 1, 2, {{P_REG18, 1, 0, 8, 0x0700}, {P_MEM, 1, 0, 0, 0x00ff}}, false, false, false, false, false}, // $(D+24) = MEM[($cr[0-7] << 8) | I] - {"SRS", 0x2800, 0xf800, 1, 2, {{P_MEM, 1, 0, 0, 0x00ff}, {P_REG18, 1, 0, 8, 0x0700}}, false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $(S+24) + {"SRSH", 0x2800, 0xfe00, 1, 2, {{P_MEM, 1, 0, 0, 0x00ff}, {P_ACCH, 1, 0, 8, 0x0100}}, false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $acS.h + {"SRS", 0x2c00, 0xfc00, 1, 2, {{P_MEM, 1, 0, 0, 0x00ff}, {P_REG1C, 1, 0, 8, 0x0300}}, false, false, false, false, false}, // MEM[($cr[0-7] << 8) | I] = $(S+24) // opcodes that can be extended diff --git a/Source/Core/Core/DSP/DSPTables.h b/Source/Core/Core/DSP/DSPTables.h index 89c224a339..2dead9094e 100644 --- a/Source/Core/Core/DSP/DSPTables.h +++ b/Source/Core/Core/DSP/DSPTables.h @@ -44,16 +44,13 @@ enum partype_t P_ACCM = P_REG | 0x1e00, // used for mid part of accum // The following are not in gcdsptool P_ACCM_D = P_REG | 0x1e80, - P_ACC = P_REG | 0x2000, // used for full accum. + P_ACC = P_REG | 0x2000, // used for full accum. + P_ACCH = P_REG | 0x1000, // used for high part of accum P_ACC_D = P_REG | 0x2080, P_AX = P_REG | 0x2200, P_REGS_MASK = 0x03f80, // gcdsptool's value = 0x01f80 P_REF = P_REG | 0x4000, P_PRG = P_REF | P_REG, - - // The following seem like junk: - // P_REG10 = P_REG | 0x1000, - // P_AX_D = P_REG | 0x2280, }; struct param2_t diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntBranch.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntBranch.cpp index 7dafeaf3d8..5f67d953c9 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntBranch.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntBranch.cpp @@ -104,13 +104,17 @@ void Interpreter::ret(const UDSPInstruction opc) state.pc = state.PopStack(StackRegister::Call); } -// RTI +// RTIcc // 0000 0010 1111 1111 // Return from exception. Pops stored status register $sr from data stack // $st1 and program counter PC from call stack $st0 and sets $pc to this // location. -void Interpreter::rti(const UDSPInstruction) +// This instruction has a conditional form, but it is not used by any official ucode. +void Interpreter::rti(const UDSPInstruction opc) { + if (!CheckCondition(opc & 0xf)) + return; + auto& state = m_dsp_core.DSPState(); state.r.sr = state.PopStack(StackRegister::Data); state.pc = state.PopStack(StackRegister::Call); diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp index 744dffb70b..df6fb4c322 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntLoadStore.cpp @@ -8,15 +8,29 @@ namespace DSP::Interpreter { -// SRS @M, $(0x18+S) -// 0010 1sss mmmm mmmm -// Move value from register $(0x18+S) to data memory pointed by address +// SRSH @M, $acS.h +// 0010 10ss mmmm mmmm +// Move value from register $acS.h to data memory pointed by address +// CR[0-7] | M. That is, the upper 8 bits of the address are the +// bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate. +void Interpreter::srsh(const UDSPInstruction opc) +{ + auto& state = m_dsp_core.DSPState(); + const auto reg = static_cast(((opc >> 8) & 0x1) + DSP_REG_ACH0); + const auto addr = static_cast((state.r.cr << 8) | (opc & 0xFF)); + + state.WriteDMEM(addr, OpReadRegister(reg)); +} + +// SRS @M, $(0x1C+S) +// 0010 11ss mmmm mmmm +// Move value from register $(0x1C+S) to data memory pointed by address // CR[0-7] | M. That is, the upper 8 bits of the address are the // bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate. void Interpreter::srs(const UDSPInstruction opc) { auto& state = m_dsp_core.DSPState(); - const auto reg = static_cast(((opc >> 8) & 0x7) + 0x18); + const auto reg = static_cast(((opc >> 8) & 0x3) + DSP_REG_ACL0); const auto addr = static_cast((state.r.cr << 8) | (opc & 0xFF)); if (reg >= DSP_REG_ACM0) diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp index eaca44d5a4..eefe8d2fd7 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntTables.cpp @@ -19,7 +19,7 @@ struct InterpreterOpInfo }; // clang-format off -constexpr std::array s_opcodes +constexpr std::array s_opcodes {{ {0x0000, 0xfffc, &Interpreter::nop}, @@ -32,7 +32,7 @@ constexpr std::array s_opcodes {0x02d0, 0xfff0, &Interpreter::ret}, - {0x02ff, 0xffff, &Interpreter::rti}, + {0x02f0, 0xfff0, &Interpreter::rti}, {0x02b0, 0xfff0, &Interpreter::call}, @@ -101,7 +101,8 @@ constexpr std::array s_opcodes // 2 {0x2000, 0xf800, &Interpreter::lrs}, - {0x2800, 0xf800, &Interpreter::srs}, + {0x2800, 0xfe00, &Interpreter::srsh}, + {0x2c00, 0xfc00, &Interpreter::srs}, // opcodes that can be extended diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h index b7b82100a8..119c509f2a 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h @@ -149,6 +149,7 @@ public: void srri(UDSPInstruction opc); void srrn(UDSPInstruction opc); void srs(UDSPInstruction opc); + void srsh(UDSPInstruction opc); void sub(UDSPInstruction opc); void subarn(UDSPInstruction opc); void subax(UDSPInstruction opc); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h index 145fa149db..ee74def463 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h +++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h @@ -88,6 +88,7 @@ public: void bloopi(UDSPInstruction opc); // Load/Store + void srsh(UDSPInstruction opc); void srs(UDSPInstruction opc); void lrs(UDSPInstruction opc); void lr(UDSPInstruction opc); @@ -220,6 +221,7 @@ private: void r_callr(UDSPInstruction opc); void r_ifcc(UDSPInstruction opc); void r_ret(UDSPInstruction opc); + void r_rti(UDSPInstruction opc); void Update_SR_Register(Gen::X64Reg val = Gen::EAX, Gen::X64Reg scratch = Gen::EDX); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index ea4df2a6fd..043e5f2043 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -255,12 +255,7 @@ void DSPEmitter::ret(const UDSPInstruction opc) ReJitConditional(opc, &DSPEmitter::r_ret); } -// RTI -// 0000 0010 1111 1111 -// Return from exception. Pops stored status register $sr from data stack -// $st1 and program counter PC from call stack $st0 and sets $pc to this -// location. -void DSPEmitter::rti(const UDSPInstruction opc) +void DSPEmitter::r_rti(const UDSPInstruction opc) { // g_dsp.r[DSP_REG_SR] = dsp_reg_load_stack(StackRegister::Data); dsp_reg_load_stack(StackRegister::Data); @@ -268,6 +263,20 @@ void DSPEmitter::rti(const UDSPInstruction opc) // g_dsp.pc = dsp_reg_load_stack(StackRegister::Call); dsp_reg_load_stack(StackRegister::Call); MOV(16, M_SDSP_pc(), R(DX)); + WriteBranchExit(); +} + +// RTIcc +// 0000 0010 1111 1111 +// Return from exception. Pops stored status register $sr from data stack +// $st1 and program counter PC from call stack $st0 and sets $pc to this +// location. +// This instruction has a conditional form, but it is not used by any official ucode. +// NOTE: Cannot use FallBackToInterpreter(opc) here because of the need to write branch exit +void DSPEmitter::rti(const UDSPInstruction opc) +{ + MOV(16, M_SDSP_pc(), Imm16(m_compile_pc + 1)); + ReJitConditional(opc, &DSPEmitter::r_rti); } // HALT diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp index 7c60d6d1d2..282bc16c3b 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitLoadStore.cpp @@ -12,14 +12,35 @@ using namespace Gen; namespace DSP::JIT::x64 { -// SRS @M, $(0x18+S) +// SRSH @M, $acS.h // 0010 1sss mmmm mmmm -// Move value from register $(0x18+S) to data memory pointed by address +// Move value from register $acS.h to data memory pointed by address +// CR[0-7] | M. That is, the upper 8 bits of the address are the +// bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate. +void DSPEmitter::srsh(const UDSPInstruction opc) +{ + u8 reg = ((opc >> 8) & 0x1) + DSP_REG_ACH0; + // u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF); + + X64Reg tmp1 = m_gpr.GetFreeXReg(); + + dsp_op_read_reg(reg, tmp1, RegisterExtension::Zero); + dsp_op_read_reg(DSP_REG_CR, RAX, RegisterExtension::Zero); + SHL(16, R(EAX), Imm8(8)); + OR(16, R(EAX), Imm16(opc & 0xFF)); + dmem_write(tmp1); + + m_gpr.PutXReg(tmp1); +} + +// SRS @M, $(0x1C+S) +// 0010 1sss mmmm mmmm +// Move value from register $(0x1C+S) to data memory pointed by address // CR[0-7] | M. That is, the upper 8 bits of the address are the // bottom 8 bits from CR, and the lower 8 bits are from the 8-bit immediate. void DSPEmitter::srs(const UDSPInstruction opc) { - u8 reg = ((opc >> 8) & 0x7) + 0x18; + u8 reg = ((opc >> 8) & 0x3) + DSP_REG_ACL0; // u16 addr = (g_dsp.r.cr << 8) | (opc & 0xFF); X64Reg tmp1 = m_gpr.GetFreeXReg(); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp index b47cc7bd8d..87f446f82d 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitTables.cpp @@ -19,7 +19,7 @@ struct JITOpInfo }; // clang-format off -const std::array s_opcodes = +const std::array s_opcodes = {{ {0x0000, 0xfffc, &DSPEmitter::nop}, @@ -32,7 +32,7 @@ const std::array s_opcodes = {0x02d0, 0xfff0, &DSPEmitter::ret}, - {0x02ff, 0xffff, &DSPEmitter::rti}, + {0x02f0, 0xfff0, &DSPEmitter::rti}, {0x02b0, 0xfff0, &DSPEmitter::call}, @@ -101,7 +101,8 @@ const std::array s_opcodes = // 2 {0x2000, 0xf800, &DSPEmitter::lrs}, - {0x2800, 0xf800, &DSPEmitter::srs}, + {0x2800, 0xfe00, &DSPEmitter::srsh}, + {0x2c00, 0xfc00, &DSPEmitter::srs}, // opcodes that can be extended diff --git a/Source/DSPSpy/tests/accelerator_loop_test.ds b/Source/DSPSpy/tests/accelerator_loop_test.ds index 20e72919f5..6348f52efe 100644 --- a/Source/DSPSpy/tests/accelerator_loop_test.ds +++ b/Source/DSPSpy/tests/accelerator_loop_test.ds @@ -1,6 +1,8 @@ incdir "tests" include "dsp_base.inc" +test_main: + ; Test parameters lri $AC0.M, #0x0000 ; start lri $AC0.L, #0x0000 ; start diff --git a/Source/DSPSpy/tests/accelerator_test.ds b/Source/DSPSpy/tests/accelerator_test.ds index ac9cb99b83..a05866d5bb 100644 --- a/Source/DSPSpy/tests/accelerator_test.ds +++ b/Source/DSPSpy/tests/accelerator_test.ds @@ -2,7 +2,6 @@ ; See https://github.com/dolphin-emu/dolphin/pull/5997 incdir "tests" include "dsp_base.inc" -jmp test_main ; Writes the passed format, start and end addresses to the accelerator registers, ; then reads them back to registers. diff --git a/Source/DSPSpy/tests/andc_ls_test.ds b/Source/DSPSpy/tests/andc_ls_test.ds index 7a0c4fccc5..9967f7cfcf 100644 --- a/Source/DSPSpy/tests/andc_ls_test.ds +++ b/Source/DSPSpy/tests/andc_ls_test.ds @@ -5,7 +5,7 @@ include "dsp_base.inc" ; ; 3d80 andc'ls $AC1.M : $AX0.L, $AC0.M - +test_main: clr $ACC0 clr $ACC1 lri $AX0.L, #0x0000 @@ -14,7 +14,7 @@ include "dsp_base.inc" lri $AC0.M, #0x0004 lri $AC1.M, #0x1234 - andc'ls $AC1.M : $AX0.L, $AC0.M + cw 0x3d80 ; andc'ls $AC1.M : $AX0.L, $AC0.M call send_back diff --git a/Source/DSPSpy/tests/arith_test.ds b/Source/DSPSpy/tests/arith_test.ds index 704437b45b..96884c6bbc 100644 --- a/Source/DSPSpy/tests/arith_test.ds +++ b/Source/DSPSpy/tests/arith_test.ds @@ -2,6 +2,8 @@ incdir "tests" include "dsp_base.inc" +test_main: + clr $acc0 tst $acc0 diff --git a/Source/DSPSpy/tests/cr_test.ds b/Source/DSPSpy/tests/cr_test.ds index 272e4b8c2a..4ffc53d0ec 100644 --- a/Source/DSPSpy/tests/cr_test.ds +++ b/Source/DSPSpy/tests/cr_test.ds @@ -12,6 +12,7 @@ TEST_ADDR: equ 0xFFA0 ; 0x0000 TEST_MEM: equ 0x00A0 ; 0x0000 TEST_MEM_2: equ 0x01A0 ; 0x0100 +test_main: LRI $AC0.L, #0xf00f SR @TEST_REG, $AC0.L SR @TEST_MEM, $AC0.L diff --git a/Source/DSPSpy/tests/dr_test.ds b/Source/DSPSpy/tests/dr_test.ds index 1085c74cdc..96109a499b 100644 --- a/Source/DSPSpy/tests/dr_test.ds +++ b/Source/DSPSpy/tests/dr_test.ds @@ -27,6 +27,7 @@ include "dsp_base.inc" ; And thus it's tricky to implement in software too :p ; test using indexing register 1 - 0 is used in send_back +test_main: lri $AR1, #16 lri $IX1, #32 lri $WR1, #0 diff --git a/Source/DSPSpy/tests/dsp_base.inc b/Source/DSPSpy/tests/dsp_base.inc index 46441fd674..685c634d95 100644 --- a/Source/DSPSpy/tests/dsp_base.inc +++ b/Source/DSPSpy/tests/dsp_base.inc @@ -1,9 +1,3 @@ -; This is the trojan program we send to the DSP from DSPSpy to figure it out. -REGS_BASE: equ 0x0f80 -MEM_HI: equ 0x0f7E -MEM_LO: equ 0x0f7F - - ; Interrupt vectors 8 vectors, 2 opcodes each jmp irq0 jmp irq1 @@ -14,276 +8,5 @@ MEM_LO: equ 0x0f7F jmp irq6 jmp irq7 -; Main code (and normal entrypoint) at 0x10 - sbset #0x02 - sbset #0x03 - sbclr #0x04 - sbset #0x05 - sbset #0x06 - - s16 - lri $CR, #0x00ff - - clr $acc1 - clr $acc0 - -; get address of memory dump and copy it to DRAM - call 0x807e - si @DMBH, #0x8888 - si @DMBL, #0xdead - si @DIRQ, #0x0001 - - call 0x8078 - andi $ac0.m, #0x7fff - lrs $ac1.m, @CMBL - - sr @MEM_HI, $ac0.m - sr @MEM_LO, $ac1.m - - lri $ax0.l, #0 - lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) - lri $ax0.h, #0x2000 - lr $ac0.l, @MEM_HI - lr $ac0.m, @MEM_LO - call do_dma - - -; get address of registers and DMA them to ram - call 0x807e - si @DMBH, #0x8888 - si @DMBL, #0xbeef - si @DIRQ, #0x0001 - - call 0x8078 - andi $ac0.m, #0x7fff - lrs $ac1.m, @CMBL - - sr @MEM_HI, $ac0.m - sr @MEM_LO, $ac1.m - - lri $ax0.l, #REGS_BASE - lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) - lri $ax0.h, #0x80 - lr $ac0.l, @MEM_HI - lr $ac0.m, @MEM_LO - call do_dma - -; Read in all the registers from RAM - lri $ar0, #REGS_BASE+1 - lrri $ar1, @$ar0 - lrri $ar2, @$ar0 - lrri $ar3, @$ar0 - lrri $ix0, @$ar0 - lrri $ix1, @$ar0 - lrri $ix2, @$ar0 - lrri $ix3, @$ar0 - lrri $wr0, @$ar0 - lrri $wr1, @$ar0 - lrri $wr2, @$ar0 - lrri $wr3, @$ar0 - lrri $st0, @$ar0 - lrri $st1, @$ar0 - lrri $st2, @$ar0 - lrri $st3, @$ar0 - lrri $ac0.h, @$ar0 - lrri $ac1.h, @$ar0 - lrri $cr, @$ar0 - lrri $sr, @$ar0 - lrri $prod.l, @$ar0 - lrri $prod.m1, @$ar0 - lrri $prod.h, @$ar0 - lrri $prod.m2, @$ar0 - lrri $ax0.l, @$ar0 - lrri $ax1.l, @$ar0 - lrri $ax0.h, @$ar0 - lrri $ax1.h, @$ar0 - lrri $ac0.l, @$ar0 - lrri $ac1.l, @$ar0 - lrri $ac0.m, @$ar0 - lrri $ac1.m, @$ar0 - lr $ar0, @REGS_BASE - - jmp start_of_test - -; This is where we jump when we're done testing, see above. -; We just fall into a loop, playing dead until someone resets the DSP. -end_of_test: - nop - jmp end_of_test - -; Utility function to do DMA. -do_dma: - sr @DSMAH, $ac0.l - sr @DSMAL, $ac0.m - sr @DSPA, $ax0.l - sr @DSCR, $ax1.l - sr @DSBL, $ax0.h ; This kicks off the DMA. - call 0x863d ; Wait for DMA to complete by watching a bit in DSCR. - ret - -; IRQ handlers. Just send back exception# and die -irq0: - lri $ac0.m, #0x0000 - jmp irq -irq1: - lri $ac0.m, #0x0001 - jmp irq -irq2: - lri $ac0.m, #0x0002 - jmp irq -irq3: - lri $ac0.m, #0x0003 - jmp irq -irq4: - lri $ac0.m, #0x0004 - jmp irq -irq5: - lrs $ac0.m, @DMBH - andcf $ac0.m, #0x8000 - jlz irq5 - si @DMBH, #0x8005 - si @DMBL, #0x0000 - si @DIRQ, #0x0001 - lri $ac0.m, #0xbbbb - sr @0xffda, $ac0.m ; pred scale - sr @0xffdb, $ac0.m ; yn1 - lr $ix2, @ARAM - sr @0xffdc, $ac0.m ; yn2 - rti -irq6: - lri $ac0.m, #0x0006 - jmp irq -irq7: - lri $ac0.m, #0x0007 - -irq: - lrs $ac1.m, @DMBH - andcf $ac1.m, #0x8000 - jlz irq - si @DMBH, #0x8bad - ;sr @DMBL, $wr3 ; ??? - sr @DMBL, $ac0.m ; Exception number - si @DIRQ, #0x0001 - halt ; Through some magic this allows us to properly ack the exception in dspspy - ;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all - -; DMA:s the current state of the registers back to the PowerPC. To do this, -; it must write the contents of all regs to DRAM. -send_back: - ; first, store $sr so we can modify it - sr @(REGS_BASE + 19), $sr - set16 - ; Now store $wr0, as it must be 0xffff for srri to work as we expect - sr @(REGS_BASE + 8), $wr0 - lri $wr0, #0xffff - ; store registers to reg table - sr @REGS_BASE, $ar0 - lri $ar0, #(REGS_BASE + 1) - srri @$ar0, $ar1 - srri @$ar0, $ar2 - srri @$ar0, $ar3 - srri @$ar0, $ix0 - srri @$ar0, $ix1 - srri @$ar0, $ix2 - srri @$ar0, $ix3 - ; skip $wr0 since we already stored and modified it - iar $ar0 - srri @$ar0, $wr1 - srri @$ar0, $wr2 - srri @$ar0, $wr3 - srri @$ar0, $st0 - srri @$ar0, $st1 - srri @$ar0, $st2 - srri @$ar0, $st3 - srri @$ar0, $ac0.h - srri @$ar0, $ac1.h - srri @$ar0, $cr - ; skip $sr since we already stored and modified it - iar $ar0 - srri @$ar0, $prod.l - srri @$ar0, $prod.m1 - srri @$ar0, $prod.h - srri @$ar0, $prod.m2 - srri @$ar0, $ax0.l - srri @$ar0, $ax1.l - srri @$ar0, $ax0.h - srri @$ar0, $ax1.h - srri @$ar0, $ac0.l - srri @$ar0, $ac1.l - srri @$ar0, $ac0.m - srri @$ar0, $ac1.m - -; Regs are stored. Prepare DMA. -; $cr must be 0x00ff because the ROM uses lrs and srs with the assumption that -; they will modify hardware registers. - lri $cr, #0x00ff - lri $ax0.l, #0x0000 - lri $ax1.l, #1 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) - lri $ax0.h, #0x200 - lr $ac0.l, @MEM_HI - lr $ac0.m, @MEM_LO - -; Now, why are we looping here? - lri $ar1, #8+8 - bloop $ar1, dma_copy - call do_dma - addi $ac0.m, #0x200 - mrr $ac1.m, $ax0.l - addi $ac1.m, #0x100 -dma_copy: - mrr $ax0.l, $ac1.m - -; Wait for the CPU to send us a mail. - call 0x807e - si @DMBH, #0x8888 - si @DMBL, #0xfeeb - si @DIRQ, #0x0001 - -; wait for the CPU to recieve our response before we execute the next op - call 0x8078 - andi $ac0.m, #0x7fff - lrs $ac1.m, @CMBL - -; Restore all regs again so we're ready to execute another op. - lri $ar0, #REGS_BASE+1 - lrri $ar1, @$ar0 - lrri $ar2, @$ar0 - lrri $ar3, @$ar0 - lrri $ix0, @$ar0 - lrri $ix1, @$ar0 - lrri $ix2, @$ar0 - lrri $ix3, @$ar0 - ; leave $wr for later - iar $ar0 - lrri $wr1, @$ar0 - lrri $wr2, @$ar0 - lrri $wr3, @$ar0 - lrri $st0, @$ar0 - lrri $st1, @$ar0 - lrri $st2, @$ar0 - lrri $st3, @$ar0 - lrri $ac0.h, @$ar0 - lrri $ac1.h, @$ar0 - lrri $cr, @$ar0 - ; leave $sr for later - iar $ar0 - lrri $prod.l, @$ar0 - lrri $prod.m1, @$ar0 - lrri $prod.h, @$ar0 - lrri $prod.m2, @$ar0 - lrri $ax0.l, @$ar0 - lrri $ax1.l, @$ar0 - lrri $ax0.h, @$ar0 - lrri $ax1.h, @$ar0 - lrri $ac0.l, @$ar0 - lrri $ac1.l, @$ar0 - lrri $ac0.m, @$ar0 - lrri $ac1.m, @$ar0 - lr $ar0, @REGS_BASE - lr $wr0, @(REGS_BASE+8) - lr $sr, @(REGS_BASE+19) - - ret ; from send_back - -; Obviously this must be included directly before your test code -start_of_test: +; The rest is in dsp_base_noirq.inc +include "dsp_base_noirq.inc" diff --git a/Source/DSPSpy/tests/dsp_base_noirq.inc b/Source/DSPSpy/tests/dsp_base_noirq.inc new file mode 100644 index 0000000000..1558c7c5ca --- /dev/null +++ b/Source/DSPSpy/tests/dsp_base_noirq.inc @@ -0,0 +1,283 @@ +; This is the trojan program we send to the DSP from DSPSpy to figure it out. +REGS_BASE: equ 0x0f80 +MEM_HI: equ 0x0f7E +MEM_LO: equ 0x0f7F + +WARNPC 0x10 +ORG 0x10 + +; Main code (and normal entrypoint) at 0x10 +; It is expected that IRQs were listed beforehand +; (e.g. by including dsp_base.inc instead of dsp_base_noirq.inc) + sbset #0x02 + sbset #0x03 + sbclr #0x04 + sbset #0x05 + sbset #0x06 + + s16 + lri $CR, #0x00ff + + clr $acc1 + clr $acc0 + +; get address of memory dump and copy it to DRAM + call 0x807e + si @DMBH, #0x8888 + si @DMBL, #0xdead + si @DIRQ, #0x0001 + + call 0x8078 + andi $ac0.m, #0x7fff + lrs $ac1.m, @CMBL + + sr @MEM_HI, $ac0.m + sr @MEM_LO, $ac1.m + + lri $ax0.l, #0 + lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) + lri $ax0.h, #0x2000 + lr $ac0.l, @MEM_HI + lr $ac0.m, @MEM_LO + call do_dma + + +; get address of registers and DMA them to ram + call 0x807e + si @DMBH, #0x8888 + si @DMBL, #0xbeef + si @DIRQ, #0x0001 + + call 0x8078 + andi $ac0.m, #0x7fff + lrs $ac1.m, @CMBL + + sr @MEM_HI, $ac0.m + sr @MEM_LO, $ac1.m + + lri $ax0.l, #REGS_BASE + lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) + lri $ax0.h, #0x80 + lr $ac0.l, @MEM_HI + lr $ac0.m, @MEM_LO + call do_dma + +; Read in all the registers from RAM + lri $ar0, #REGS_BASE+1 + lrri $ar1, @$ar0 + lrri $ar2, @$ar0 + lrri $ar3, @$ar0 + lrri $ix0, @$ar0 + lrri $ix1, @$ar0 + lrri $ix2, @$ar0 + lrri $ix3, @$ar0 + lrri $wr0, @$ar0 + lrri $wr1, @$ar0 + lrri $wr2, @$ar0 + lrri $wr3, @$ar0 + lrri $st0, @$ar0 + lrri $st1, @$ar0 + lrri $st2, @$ar0 + lrri $st3, @$ar0 + lrri $ac0.h, @$ar0 + lrri $ac1.h, @$ar0 + lrri $cr, @$ar0 + lrri $sr, @$ar0 + lrri $prod.l, @$ar0 + lrri $prod.m1, @$ar0 + lrri $prod.h, @$ar0 + lrri $prod.m2, @$ar0 + lrri $ax0.l, @$ar0 + lrri $ax1.l, @$ar0 + lrri $ax0.h, @$ar0 + lrri $ax1.h, @$ar0 + lrri $ac0.l, @$ar0 + lrri $ac1.l, @$ar0 + lrri $ac0.m, @$ar0 + lrri $ac1.m, @$ar0 + lr $ar0, @REGS_BASE + + jmp test_main + +; This is where we jump when we're done testing, see above. +; We just fall into a loop, playing dead until someone resets the DSP. +end_of_test: + nop + jmp end_of_test + +; Utility function to do DMA. +do_dma: + sr @DSMAH, $ac0.l + sr @DSMAL, $ac0.m + sr @DSPA, $ax0.l + sr @DSCR, $ax1.l + sr @DSBL, $ax0.h ; This kicks off the DMA. +wait_dma_finish: + lr $ac1.m, @DSCR + andcf $ac1.m, #0x4 + jlz wait_dma_finish + ret + +; IRQ handlers. Just send back exception# and die +irq0: + lri $ac0.m, #0x0000 + jmp irq +irq1: + lri $ac0.m, #0x0001 + jmp irq +irq2: + lri $ac0.m, #0x0002 + jmp irq +irq3: + lri $ac0.m, #0x0003 + jmp irq +irq4: + lri $ac0.m, #0x0004 + jmp irq +irq5: + lrs $ac0.m, @DMBH + andcf $ac0.m, #0x8000 + jlz irq5 + si @DMBH, #0x8005 + si @DMBL, #0x0000 + si @DIRQ, #0x0001 + lri $ac0.m, #0xbbbb + sr @0xffda, $ac0.m ; pred scale + sr @0xffdb, $ac0.m ; yn1 + lr $ix2, @ARAM + sr @0xffdc, $ac0.m ; yn2 + rti +irq6: + lri $ac0.m, #0x0006 + jmp irq +irq7: + lri $ac0.m, #0x0007 + +irq: + lrs $ac1.m, @DMBH + andcf $ac1.m, #0x8000 + jlz irq + si @DMBH, #0x8bad + ;sr @DMBL, $wr3 ; ??? + sr @DMBL, $ac0.m ; Exception number + si @DIRQ, #0x0001 + halt ; Through some magic this allows us to properly ack the exception in dspspy + ;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all + +; DMA:s the current state of the registers back to the PowerPC. To do this, +; it must write the contents of all regs to DRAM. +send_back: + ; first, store $sr so we can modify it + sr @(REGS_BASE + 19), $sr + set16 + ; Now store $wr0, as it must be 0xffff for srri to work as we expect + sr @(REGS_BASE + 8), $wr0 + lri $wr0, #0xffff + ; store registers to reg table + sr @REGS_BASE, $ar0 + lri $ar0, #(REGS_BASE + 1) + srri @$ar0, $ar1 + srri @$ar0, $ar2 + srri @$ar0, $ar3 + srri @$ar0, $ix0 + srri @$ar0, $ix1 + srri @$ar0, $ix2 + srri @$ar0, $ix3 + ; skip $wr0 since we already stored and modified it + iar $ar0 + srri @$ar0, $wr1 + srri @$ar0, $wr2 + srri @$ar0, $wr3 + srri @$ar0, $st0 + srri @$ar0, $st1 + srri @$ar0, $st2 + srri @$ar0, $st3 + srri @$ar0, $ac0.h + srri @$ar0, $ac1.h + srri @$ar0, $cr + ; skip $sr since we already stored and modified it + iar $ar0 + srri @$ar0, $prod.l + srri @$ar0, $prod.m1 + srri @$ar0, $prod.h + srri @$ar0, $prod.m2 + srri @$ar0, $ax0.l + srri @$ar0, $ax1.l + srri @$ar0, $ax0.h + srri @$ar0, $ax1.h + srri @$ar0, $ac0.l + srri @$ar0, $ac1.l + srri @$ar0, $ac0.m + srri @$ar0, $ac1.m + +; Regs are stored. Prepare DMA. +; $cr must be 0x00ff because the ROM uses lrs and srs with the assumption that +; they will modify hardware registers. + lri $cr, #0x00ff + lri $ax0.l, #0x0000 + lri $ax1.l, #1 ;(DSP_CR_IMEM | DSP_CR_TO_CPU) + lri $ax0.h, #0x200 + lr $ac0.l, @MEM_HI + lr $ac0.m, @MEM_LO + +; Now, why are we looping here? + lri $ar1, #8+8 + bloop $ar1, dma_copy + call do_dma + addi $ac0.m, #0x200 + mrr $ac1.m, $ax0.l + addi $ac1.m, #0x100 +dma_copy: + mrr $ax0.l, $ac1.m + +; Wait for the CPU to send us a mail. + call 0x807e + si @DMBH, #0x8888 + si @DMBL, #0xfeeb + si @DIRQ, #0x0001 + +; wait for the CPU to recieve our response before we execute the next op + call 0x8078 + andi $ac0.m, #0x7fff + lrs $ac1.m, @CMBL + +; Restore all regs again so we're ready to execute another op. + lri $ar0, #REGS_BASE+1 + lrri $ar1, @$ar0 + lrri $ar2, @$ar0 + lrri $ar3, @$ar0 + lrri $ix0, @$ar0 + lrri $ix1, @$ar0 + lrri $ix2, @$ar0 + lrri $ix3, @$ar0 + ; leave $wr for later + iar $ar0 + lrri $wr1, @$ar0 + lrri $wr2, @$ar0 + lrri $wr3, @$ar0 + lrri $st0, @$ar0 + lrri $st1, @$ar0 + lrri $st2, @$ar0 + lrri $st3, @$ar0 + lrri $ac0.h, @$ar0 + lrri $ac1.h, @$ar0 + lrri $cr, @$ar0 + ; leave $sr for later + iar $ar0 + lrri $prod.l, @$ar0 + lrri $prod.m1, @$ar0 + lrri $prod.h, @$ar0 + lrri $prod.m2, @$ar0 + lrri $ax0.l, @$ar0 + lrri $ax1.l, @$ar0 + lrri $ax0.h, @$ar0 + lrri $ax1.h, @$ar0 + lrri $ac0.l, @$ar0 + lrri $ac1.l, @$ar0 + lrri $ac0.m, @$ar0 + lrri $ac1.m, @$ar0 + lr $ar0, @REGS_BASE + lr $wr0, @(REGS_BASE+8) + lr $sr, @(REGS_BASE+19) + + ret ; from send_back diff --git a/Source/DSPSpy/tests/dsp_test.ds b/Source/DSPSpy/tests/dsp_test.ds index 3550dd205c..772573b9c3 100644 --- a/Source/DSPSpy/tests/dsp_test.ds +++ b/Source/DSPSpy/tests/dsp_test.ds @@ -6,13 +6,14 @@ include "dsp_base.inc" ; We can call send_back at any time to send data back to the PowerPC. - lri $AC0.M, #0x1000 - call send_back +test_main: + lri $AC0.M, #0x1000 + call send_back - set40 - lri $AC0.M, #0x1000 - set16 - call send_back + set40 + lri $AC0.M, #0x1000 + set16 + call send_back ; We're done, DO NOT DELETE THIS LINE - jmp end_of_test + jmp end_of_test diff --git a/Source/DSPSpy/tests/ir_test.ds b/Source/DSPSpy/tests/ir_test.ds index b9a80b3c96..e47aa729bf 100644 --- a/Source/DSPSpy/tests/ir_test.ds +++ b/Source/DSPSpy/tests/ir_test.ds @@ -30,7 +30,7 @@ include "dsp_base.inc" ; I really don't know how the above could possibly be efficiently implemented in hardware. ; And thus it's tricky to implement in software too :p - +test_main: ; test using indexing register 1 - 0 is used in send_back lri $AR1, #16 lri $IX1, #32 diff --git a/Source/DSPSpy/tests/ld_test.ds b/Source/DSPSpy/tests/ld_test.ds index 0154677d6a..045680c877 100644 --- a/Source/DSPSpy/tests/ld_test.ds +++ b/Source/DSPSpy/tests/ld_test.ds @@ -2,6 +2,7 @@ incdir "tests" include "dsp_base.inc" +test_main: lri $AR0, #0x001c lri $AR1, #0x001d lri $AR2, #0x001e diff --git a/Source/DSPSpy/tests/less_test.ds b/Source/DSPSpy/tests/less_test.ds index 1b75eaecdd..20be209b73 100644 --- a/Source/DSPSpy/tests/less_test.ds +++ b/Source/DSPSpy/tests/less_test.ds @@ -1,6 +1,7 @@ incdir "tests" include "dsp_base.inc" +test_main: CLR $acc0 CLR $acc1 LRI $ac0.h, #0x0050 diff --git a/Source/DSPSpy/tests/mul_test.ds b/Source/DSPSpy/tests/mul_test.ds index 3ec2e83f64..8f5ca9810f 100644 --- a/Source/DSPSpy/tests/mul_test.ds +++ b/Source/DSPSpy/tests/mul_test.ds @@ -2,8 +2,9 @@ incdir "tests" include "dsp_base.inc" -; Results is in capitails like this: UNSIGNED +; Results are in capitals like this: UNSIGNED +test_main: CLR15 ; Test MULXMVZ - SET15 diff --git a/Source/DSPSpy/tests/neg_test.ds b/Source/DSPSpy/tests/neg_test.ds index 22c4ba8481..57c1610511 100644 --- a/Source/DSPSpy/tests/neg_test.ds +++ b/Source/DSPSpy/tests/neg_test.ds @@ -2,6 +2,7 @@ incdir "tests" include "dsp_base.inc" +test_main: clr $ACC0 neg $ACC0 diff --git a/Source/DSPSpy/tests/op_test.ds b/Source/DSPSpy/tests/op_test.ds index 5b2ab76bc5..b77fc6525d 100644 --- a/Source/DSPSpy/tests/op_test.ds +++ b/Source/DSPSpy/tests/op_test.ds @@ -10,6 +10,7 @@ include "dsp_base.inc" // 0x02cb is the same opcode, but arithmetic. // We'll call it asrn, no arguments. +test_main: clr $ACC0 clr $ACC1 lri $AC0.H, #0 diff --git a/Source/DSPSpy/tests/reg_mask_test.ds b/Source/DSPSpy/tests/reg_mask_test.ds index 3bc0e9068b..39238d5b16 100644 --- a/Source/DSPSpy/tests/reg_mask_test.ds +++ b/Source/DSPSpy/tests/reg_mask_test.ds @@ -1,6 +1,7 @@ incdir "tests" include "dsp_base.inc" +test_main: ; Test what happens various values are written to every register LRI $ar0, #0xffff CALL set_all_regs diff --git a/Source/DSPSpy/tests/rti_test.ds b/Source/DSPSpy/tests/rti_test.ds new file mode 100644 index 0000000000..81acd6af47 --- /dev/null +++ b/Source/DSPSpy/tests/rti_test.ds @@ -0,0 +1,55 @@ +; This test needs to manually specify IRQs + jmp irq0 + jmp irq1 + jmp irq2 + jmp irq3 + jmp irq4 + jmp accov_irq + jmp irq6 + jmp irq7 + +incdir "tests" +include "dsp_base_noirq.inc" + +test_main: + ; Use the accelerator to generate an IRQ by setting the start and end address to 0 + ; This will result in an interrupt on every read + SI @0xffda, #0 ; pred_scale + SI @0xffdb, #0 ; yn1 + SI @0xffdc, #0 ; yn2 + SI @0xffd1, #0 ; SampleFormat + SI @ACSAH, #0 + SI @ACCAH, #0 + SI @ACSAL, #0 + SI @ACCAL, #0 + SI @ACEAH, #0 + SI @ACEAL, #0 + + + LRI $AX1.H, #0x0000 + LRS $AX0.L, @ARAM ; Trigger interrupt + CALL send_back + + LRI $AX1.H, #0x0001 + LRS $AX0.L, @ARAM ; Trigger interrupt + CALL send_back + + LRI $AX1.H, #0x0000 + LRS $AX0.L, @ARAM ; Trigger interrupt + CALL send_back + + jmp end_of_test + +accov_irq: + ; Restore registers, otherwise no new interrupt will be generated + SI @0xffda, #0 ; pred_scale + SI @0xffdb, #0 ; yn1 + SI @0xffdc, #0 ; yn2 + + TSTAXH $AX1.H + LRI $AX1.L, #0x1111 + cw 0x02f4 ; RTINZ if it exists + LRI $AX1.L, #0x2222 + cw 0x02f5 ; RTIZ if it exists + LRI $AX1.L, #0x3333 + RTI diff --git a/Source/DSPSpy/tests/srs_test.ds b/Source/DSPSpy/tests/srs_test.ds new file mode 100644 index 0000000000..38febf7f1a --- /dev/null +++ b/Source/DSPSpy/tests/srs_test.ds @@ -0,0 +1,131 @@ +incdir "tests" +include "dsp_base.inc" + +test_main: +; Test registers used by LRS and SRS + LRI $CR, #0x0000 + CALL clear_regs + CALL store_mem_sr + + ; Write with SR, read with LR + LRI $AR0, #0xA00A + CALL create_pattern + CALL store_mem_sr + CALL send_back + CALL clear_regs + CALL read_mem_lr + CALL send_back + + ; Write with SR, read with LRS + LRI $AR0, #0xB00B + CALL create_pattern + CALL store_mem_sr + CALL send_back + CALL clear_regs + CALL read_mem_lrs + CALL send_back + + ; Write with SRS, read with LR + LRI $AR0, #0xC00C + CALL create_pattern + CALL store_mem_srs + CALL send_back + CALL clear_regs + CALL read_mem_lr + CALL send_back + + ; Write with SR, read with LRS + LRI $AR0, #0xD00D + CALL create_pattern + CALL store_mem_srs + CALL send_back + CALL clear_regs + CALL read_mem_lrs + CALL send_back + +; We're done, DO NOT DELETE THIS LINE + JMP end_of_test + +create_pattern: + LRI $IX0, #0x0110 + MRR $AX0.L, $AR0 + ADDARN $AR0, $IX0 + MRR $AX1.L, $AR0 + ADDARN $AR0, $IX0 + MRR $AX0.H, $AR0 + ADDARN $AR0, $IX0 + MRR $AX1.H, $AR0 + ADDARN $AR0, $IX0 + MRR $AC0.L, $AR0 + ADDARN $AR0, $IX0 + MRR $AC1.L, $AR0 + ADDARN $AR0, $IX0 + MRR $AC0.M, $AR0 + ADDARN $AR0, $IX0 + MRR $AC1.M, $AR0 + ADDARN $AR0, $IX0 + ; AC0.H and AC1.H have odd results since they're 8-bit sign-extended, but that's fine. + MRR $AC0.H, $AR0 + ADDARN $AR0, $IX0 + MRR $AC1.H, $AR0 + RET + +clear_regs: + LRI $AX0.L, #0x0000 + LRI $AX1.L, #0x0000 + LRI $AX0.H, #0x0000 + LRI $AX1.H, #0x0000 + LRI $AC0.L, #0x0000 + LRI $AC1.L, #0x0000 + LRI $AC0.M, #0x0000 + LRI $AC1.M, #0x0000 + LRI $AC0.H, #0x0000 + LRI $AC1.H, #0x0000 + RET + +read_mem_lr: + LR $AX0.L, @0x0000 + LR $AX1.L, @0x0001 + LR $AX0.H, @0x0002 + LR $AX1.H, @0x0003 + LR $AC0.L, @0x0004 + LR $AC1.L, @0x0005 + LR $AC0.M, @0x0006 + LR $AC1.M, @0x0007 + RET + +read_mem_lrs: + LRS $AX0.L, @0x00 + LRS $AX1.L, @0x01 + LRS $AX0.H, @0x02 + LRS $AX1.H, @0x03 + LRS $AC0.L, @0x04 + LRS $AC1.L, @0x05 + LRS $AC0.M, @0x06 + LRS $AC1.M, @0x07 + RET + +store_mem_sr: + SR @0x0000, $AX0.L + SR @0x0001, $AX1.L + SR @0x0002, $AX0.H + SR @0x0003, $AX1.H + SR @0x0004, $AC0.L + SR @0x0005, $AC1.L + SR @0x0006, $AC0.M + SR @0x0007, $AC1.M + RET + +store_mem_srs: + ; For future compatibility these have been changed to cw. + ; The way the instructions were originally encoded is commented, + ; but this does not match their behavior. + cw 0x2800 ; SRS @0x00, $AX0.L - actually SRSH @0x00, $AC0.H + cw 0x2901 ; SRS @0x01, $AX1.L - actually SRSH @0x01, $AC1.H + cw 0x2A02 ; SRS @0x02, $AX0.H - actually unknown, no store performed + cw 0x2B03 ; SRS @0x03, $AX1.H - actually unknown, no store performed + cw 0x2C04 ; SRS @0x04, $AC0.L + cw 0x2D05 ; SRS @0x05, $AC1.L + cw 0x2E06 ; SRS @0x06, $AC0.M + cw 0x2F07 ; SRS @0x07, $AC1.M + RET diff --git a/Source/DSPSpy/tests/unk_regs_test.ds b/Source/DSPSpy/tests/unk_regs_test.ds index 54f4fa5a74..d4fd79c5ae 100644 --- a/Source/DSPSpy/tests/unk_regs_test.ds +++ b/Source/DSPSpy/tests/unk_regs_test.ds @@ -2,7 +2,7 @@ incdir "tests" include "dsp_base.inc" ; Reads regs from 0xFF80 to 0xFF8D and sends them back - +test_main: lr $AC0.M, @0xff80 call send_back diff --git a/docs/DSP/free_dsp_rom/dsp_rom.ds b/docs/DSP/free_dsp_rom/dsp_rom.ds index 7e845f8a02..7ee9b5c5d8 100644 --- a/docs/DSP/free_dsp_rom/dsp_rom.ds +++ b/docs/DSP/free_dsp_rom/dsp_rom.ds @@ -86,11 +86,11 @@ WARNPC 0x8b ORG 0x8b ; called by GBA ucode dram_to_cpu: - srs @DSMAH, $AX0.H - srs @DSMAL, $AX0.L + sr @DSMAH, $AX0.H + sr @DSMAL, $AX0.L si @DSCR, #0x1 - srs @DSPA, $AX1.H - srs @DSBL, $AX1.L + sr @DSPA, $AX1.H + sr @DSBL, $AX1.L call wait_dma+#IROM_BASE ret @@ -108,11 +108,11 @@ ORG 0xbc ; called by GBA ucode bootucode_ax: lris $AC0.M, #0 - srs @DSCR, $AC0.M - srs @DSMAH, $AX0.H - srs @DSMAL, $AX0.L - srs @DSPA, $AX1.H - srs @DSBL, $AX1.L + srs @DSCR, $AC0.M + sr @DSMAH, $AX0.H + sr @DSMAL, $AX0.L + sr @DSPA, $AX1.H + sr @DSBL, $AX1.L call wait_dma+#IROM_BASE bootucode_ix: @@ -145,9 +145,15 @@ ORG 0xe7 ; Returns: ; AX0.L is the value of the last sample from input 1 ; AX1.H is the value of the last sample from input 2 +; +; for i = 0..31: +; ar3[i] = ((ar2[i] << 16) + ar0[i] * ar1[0]) >> 16 +; for i = 0..31: +; ix1[i] = ((ix1[i] << 16) + ix0[i] * ar1[2]) >> 16 +; ax0.l = ar0[31] * ar1[0] +; ax1.h = ix0[31] * ar1[2] mix_two_add: call mix_add+#IROM_BASE - iar $AR1 mrr $AR0, $IX0 mrr $AR2, $IX1 mrr $AR3, $IX1 @@ -183,8 +189,14 @@ ORG 0x1f9 ; Returns: ; AX0.L is the value of the last sample ; AX1.H is the first address after the output +; +; for i = 0..31: +; ar3[i] = ((ar2[i] << 16) + ar0[i] * ar1[0]) >> 16 +; ax0.l = ar0[31] * ar1[0] +; ax1.h = ar3 + 32 // assuming ar3 is a s32 pointer mix_add: lrri $AX1.L, @$AR1 + iar $AR1 bloopi #32, ____mix_add_end_loop+#IROM_BASE lrri $AC0.M, @$AR2 lrri $AC0.L, @$AR2 @@ -203,6 +215,13 @@ ____mix_add_end_loop: WARNPC 0x282 ORG 0x282 +; for i = 0..31: +; ar3[i] = ar1[0] + i * ar1[1] +; ar2[i] = ((ar2[i] << 16) + ar0[i] * ar3[i]) >> 16 +; ar3[i+32] = ar1[2] + i * ar1[3] +; ix1[i] = ((ix1[i] << 16) + ix0[i] * ar3[i+32]) >> 16 +; ax0.l = ar0[31] * ar3[31] +; ax1.h = ix0[31] * ar3[63] mix_two_add_ramp: call mix_add_ramp+#IROM_BASE mrr $AR0, $IX0 @@ -227,6 +246,10 @@ sub_8458: WARNPC 0x45d ORG 0x45d +; for i = 0..31: +; ar3[i] = ar1[0] + i * ar1[1] +; ar2[i] = ((ar2[i] << 16) + ar0[i] * ar3[i]) >> 16 +; ax0.l = ar0[31] * ar3[31] mix_add_ramp: clr $ACC0 clr $ACC1 @@ -259,7 +282,6 @@ ____mix_add_ramp_end_loop: srri @$AR3, $AC0.L movp $ACC0 mrr $AX0.L, $AC0.M - mrr $AX1.H, $AR3 mrr $AR1, $IX3 mrr $AR3, $IX2 ret diff --git a/docs/DSP/free_dsp_rom/dsp_rom_readme.txt b/docs/DSP/free_dsp_rom/dsp_rom_readme.txt index 51ee21eba3..21601a16a8 100644 --- a/docs/DSP/free_dsp_rom/dsp_rom_readme.txt +++ b/docs/DSP/free_dsp_rom/dsp_rom_readme.txt @@ -1,64 +1,75 @@ -Legal GC/WII DSP IROM replacement (v0.3.1) -------------------------------------------------------- - -- irom: When running from the ROM entrypoint, skip the bootucode_ax branch - of the bootucode procedure. Since the ROM doesn't set any of the AX - registers, it could cause bad DMA transfers and crashes. - -ligfx -10/aug/2017 - -Legal GC/WII DSP IROM replacement (v0.3) -------------------------------------------------------- - -- coef: Explicitly set 23 different values that are used by GBA UCode, and - tweaked overall parameters to more closely match those 23 values. -- irom: Moved a few functions to their proper places, updated BootUCode to - configure DMA transfers using AX registers as well as IX registers (the GBA - UCode uses this to do two sequential transfers in one call), and added - partial functions used by GBA UCode. - -ligfx -2/june/2017 - -Legal GC/WII DSP IROM replacement (v0.2.1) -------------------------------------------------------- - -- coef: 4-tap polyphase FIR filters -- irom: unchanged - -Coefficients are roughly equivalent to those in the official DROM. -Improves resampling quality greatly over linear interpolation. -See generate_coefs.py for details. - -stgn -29/june/2015 - -Legal GC/WII DSP IROM replacement (v0.2) -------------------------------------------------------- - -- coef: crafted to use a linear interpolation when resampling (instead of - having a real 4 TAP FIR filter) -- irom: added all the mixing functions, some functions not used by AX/Zelda are - still missing - -Should work with all AX, AXWii and Zelda UCode games. Card/IPL/GBA are most -likely still broken with it and require a real DSP ROM. - -delroth -16/march/2013 - -Legal GC/WII DSP IROM replacement (v0.1) -------------------------------------------------------- -- coef: fake (zeroes) -- irom: reversed and rewrote ucode loading/reset part, everything else is missing - -Good enough for Zelda ucode games (and maybe some AX too): -- WII: SMG 1/2, Pikmin 1/2 WII, Zelda TP WII, Donkey Kong Jungle Beat (WII), ... -- GC: Mario Kart Double Dash, Luigi Mansion, Super Mario Sunshine, Pikmin 1/2, Zelda WW, Zelda TP, ... - -Basically... If game is not using coef and irom mixing functions it will work ok. -Dolphin emulator will report wrong CRCs, but it will work ok with mentioned games. - -LM -31/july/2011 +Legal GC/WII DSP IROM replacement (v0.4) (0xe789b5a5, 0xa4a575f5) +------------------------------------------------------- + +- irom: Minor accuracy and documentation improvements +- irom: Remove use of SRS instruction with AX registers, as those instructions + do not actually exist + +Tilka, Pokechu22 +17/aug/2021 + +Legal GC/WII DSP IROM replacement (v0.3.1) (0x128ea7a2, 0xa4a575f5) +------------------------------------------------------- + +- irom: When running from the ROM entrypoint, skip the bootucode_ax branch + of the bootucode procedure. Since the ROM doesn't set any of the AX + registers, it could cause bad DMA transfers and crashes. + +ligfx +10/aug/2017 + +Legal GC/WII DSP IROM replacement (v0.3) (0x3aa4a793, 0xa4a575f5) +------------------------------------------------------- + +- coef: Explicitly set 23 different values that are used by GBA UCode, and + tweaked overall parameters to more closely match those 23 values. +- irom: Moved a few functions to their proper places, updated BootUCode to + configure DMA transfers using AX registers as well as IX registers (the GBA + UCode uses this to do two sequential transfers in one call), and added + partial functions used by GBA UCode. + +ligfx +2/june/2017 + +Legal GC/WII DSP IROM replacement (v0.2.1) (0xd9907f71, 0xdb6880c1) +------------------------------------------------------- + +- coef: 4-tap polyphase FIR filters +- irom: unchanged + +Coefficients are roughly equivalent to those in the official DROM. +Improves resampling quality greatly over linear interpolation. +See generate_coefs.py for details. + +stgn +29/june/2015 + +Legal GC/WII DSP IROM replacement (v0.2) (0xd9907f71, 0xb019c2fb) +------------------------------------------------------- + +- coef: crafted to use a linear interpolation when resampling (instead of + having a real 4 TAP FIR filter) +- irom: added all the mixing functions, some functions not used by AX/Zelda are + still missing + +Should work with all AX, AXWii and Zelda UCode games. Card/IPL/GBA are most +likely still broken with it and require a real DSP ROM. + +delroth +16/march/2013 + +Legal GC/WII DSP IROM replacement (v0.1) (0x9c8f593c, 0x10000001) +------------------------------------------------------- + +- coef: fake (zeroes) +- irom: reversed and rewrote ucode loading/reset part, everything else is missing + +Good enough for Zelda ucode games (and maybe some AX too): +- WII: SMG 1/2, Pikmin 1/2 WII, Zelda TP WII, Donkey Kong Jungle Beat (WII), ... +- GC: Mario Kart Double Dash, Luigi Mansion, Super Mario Sunshine, Pikmin 1/2, Zelda WW, Zelda TP, ... + +Basically... If game is not using coef and irom mixing functions it will work ok. +Dolphin emulator will report wrong CRCs, but it will work ok with mentioned games. + +LM +31/july/2011