diff --git a/src/ARM.cpp b/src/ARM.cpp index b7b703da..beefc132 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -222,7 +222,7 @@ void ARM::DoSavestate(Savestate* file) file->VarArray(R_ABT, 3*sizeof(u32)); file->VarArray(R_IRQ, 3*sizeof(u32)); file->VarArray(R_UND, 3*sizeof(u32)); - file->Var32(&CurInstr); + file->Var64(&CurInstr); #ifdef JIT_ENABLED if (file->Saving && NDS.IsJITEnabled()) { @@ -232,7 +232,7 @@ void ARM::DoSavestate(Savestate* file) FillPipeline(); } #endif - file->VarArray(NextInstr, 2*sizeof(u32)); + file->VarArray(NextInstr, 2*sizeof(u64)); file->Var32(&ExceptionBase); @@ -344,12 +344,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) CPSR &= ~0x20; } - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return; - } - NDS.MonitorARM9Jump(addr); } @@ -518,6 +512,7 @@ void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony) } } +template void ARM::TriggerIRQ() { if (CPSR & 0x80) @@ -529,7 +524,12 @@ void ARM::TriggerIRQ() UpdateMode(oldcpsr, CPSR); R_IRQ[2] = oldcpsr; - R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); +#ifdef JIT_ENABLED + if constexpr (mode == CPUExecuteMode::JIT) + R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + else +#endif + R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x18); // ARDS cheat support @@ -540,6 +540,11 @@ void ARM::TriggerIRQ() NDS.AREngine.RunCheats(); } } +template void ARM::TriggerIRQ(); +template void ARM::TriggerIRQ(); +#ifdef JIT_ENABLED +template void ARM::TriggerIRQ(); +#endif void ARMv5::PrefetchAbort() { @@ -550,17 +555,8 @@ void ARMv5::PrefetchAbort() CPSR |= 0x97; UpdateMode(oldcpsr, CPSR); - // this shouldn't happen, but if it does, we're stuck in some nasty endless loop - // so better take care of it - if (!(PU_Map[ExceptionBase>>12] & 0x04)) - { - Log(LogLevel::Error, "!!!!! EXCEPTION REGION NOT EXECUTABLE. THIS IS VERY BAD!!\n"); - NDS.Stop(Platform::StopReason::BadExceptionRegion); - return; - } - R_ABT[2] = oldcpsr; - R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x0C); } @@ -599,7 +595,13 @@ void ARMv5::Execute() { Halted = 0; if (NDS.IME[0] & 0x1) - TriggerIRQ(); + { +#ifdef JIT_ENABLED + if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ(); + else +#endif + IRQ = 1; + } } else { @@ -634,7 +636,7 @@ void ARMv5::Execute() { // this order is crucial otherwise idle loops waiting for an IRQ won't function if (IRQ) - TriggerIRQ(); + TriggerIRQ(); if (Halted || IdleLoop) { @@ -662,10 +664,18 @@ void ARMv5::Execute() NextInstr[0] = NextInstr[1]; if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); - - // actually execute - u32 icode = (CurInstr >> 6) & 0x3FF; - ARMInterpreter::THUMBInstrTable[icode](this); + + + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions + { + PrefetchAbort(); + } + else [[likely]] // actually execute + { + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); + } } else { @@ -677,9 +687,14 @@ void ARMv5::Execute() CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15], false); + - // actually execute - if (CheckCondition(CurInstr >> 28)) + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions + { + PrefetchAbort(); + } + else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); @@ -688,6 +703,10 @@ void ARMv5::Execute() { ARMInterpreter::A_BLX_IMM(this); } + else if ((CurInstr & 0x0FF000F0) == 0x01200070) + { + ARMInterpreter::A_BKPT(this); // always passes regardless of condition code + } else AddCycles_C(); } @@ -704,10 +723,8 @@ void ARMv5::Execute() /*if (NDS::IF[0] & NDS::IE[0]) { if (NDS::IME[0] & 0x1) - TriggerIRQ(); + TriggerIRQ(); }*/ - if (IRQ) TriggerIRQ(); - } NDS.ARM9Timestamp += Cycles; @@ -739,7 +756,10 @@ void ARMv4::Execute() { Halted = 0; if (NDS.IME[1] & 0x1) - TriggerIRQ(); + { + if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ(); + else IRQ = 1; + } } else { @@ -773,7 +793,7 @@ void ARMv4::Execute() if (StopExecution) { if (IRQ) - TriggerIRQ(); + TriggerIRQ(); if (Halted || IdleLoop) { @@ -801,9 +821,13 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead16(R[15]); - // actually execute - u32 icode = (CurInstr >> 6); - ARMInterpreter::THUMBInstrTable[icode](this); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else + { + // actually execute + u32 icode = (CurInstr >> 6); + ARMInterpreter::THUMBInstrTable[icode](this); + } } else { @@ -816,8 +840,8 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15]); - // actually execute - if (CheckCondition(CurInstr >> 28)) + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CheckCondition(CurInstr >> 28)) // actually execute { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); @@ -838,9 +862,8 @@ void ARMv4::Execute() /*if (NDS::IF[1] & NDS::IE[1]) { if (NDS::IME[1] & 0x1) - TriggerIRQ(); + TriggerIRQ(); }*/ - if (IRQ) TriggerIRQ(); } NDS.ARM7Timestamp += Cycles; @@ -1113,70 +1136,78 @@ u32 ARMv5::ReadMem(u32 addr, int size) } #endif -void ARMv4::DataRead8(u32 addr, u32* val) +bool ARMv4::DataRead8(u32 addr, u32* val) { *val = BusRead8(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataRead16(u32 addr, u32* val) +bool ARMv4::DataRead16(u32 addr, u32* val) { addr &= ~1; *val = BusRead16(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataRead32(u32 addr, u32* val) +bool ARMv4::DataRead32(u32 addr, u32* val) { addr &= ~3; *val = BusRead32(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][2]; + return true; } -void ARMv4::DataRead32S(u32 addr, u32* val) +bool ARMv4::DataRead32S(u32 addr, u32* val) { addr &= ~3; *val = BusRead32(addr); DataCycles += NDS.ARM7MemTimings[addr >> 15][3]; + return true; } -void ARMv4::DataWrite8(u32 addr, u8 val) +bool ARMv4::DataWrite8(u32 addr, u8 val) { BusWrite8(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataWrite16(u32 addr, u16 val) +bool ARMv4::DataWrite16(u32 addr, u16 val) { addr &= ~1; BusWrite16(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataWrite32(u32 addr, u32 val) +bool ARMv4::DataWrite32(u32 addr, u32 val) { addr &= ~3; BusWrite32(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][2]; + return true; } -void ARMv4::DataWrite32S(u32 addr, u32 val) +bool ARMv4::DataWrite32S(u32 addr, u32 val) { addr &= ~3; BusWrite32(addr, val); DataCycles += NDS.ARM7MemTimings[addr >> 15][3]; + return true; } diff --git a/src/ARM.h b/src/ARM.h index b652e74d..f4b3b53f 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -128,19 +128,20 @@ public: void UpdateMode(u32 oldmode, u32 newmode, bool phony = false); + template void TriggerIRQ(); void SetupCodeMem(u32 addr); - virtual void DataRead8(u32 addr, u32* val) = 0; - virtual void DataRead16(u32 addr, u32* val) = 0; - virtual void DataRead32(u32 addr, u32* val) = 0; - virtual void DataRead32S(u32 addr, u32* val) = 0; - virtual void DataWrite8(u32 addr, u8 val) = 0; - virtual void DataWrite16(u32 addr, u16 val) = 0; - virtual void DataWrite32(u32 addr, u32 val) = 0; - virtual void DataWrite32S(u32 addr, u32 val) = 0; + virtual bool DataRead8(u32 addr, u32* val) = 0; + virtual bool DataRead16(u32 addr, u32* val) = 0; + virtual bool DataRead32(u32 addr, u32* val) = 0; + virtual bool DataRead32S(u32 addr, u32* val) = 0; + virtual bool DataWrite8(u32 addr, u8 val) = 0; + virtual bool DataWrite16(u32 addr, u16 val) = 0; + virtual bool DataWrite32(u32 addr, u32 val) = 0; + virtual bool DataWrite32S(u32 addr, u32 val) = 0; virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; @@ -176,8 +177,8 @@ public: u32 R_ABT[3]; u32 R_IRQ[3]; u32 R_UND[3]; - u32 CurInstr; - u32 NextInstr[2]; + u64 CurInstr; + u64 NextInstr[2]; u32 ExceptionBase; @@ -250,16 +251,16 @@ public: void Execute(); // all code accesses are forced nonseq 32bit - u32 CodeRead32(u32 addr, bool branch); + u64 CodeRead32(u32 addr, bool branch); - void DataRead8(u32 addr, u32* val) override; - void DataRead16(u32 addr, u32* val) override; - void DataRead32(u32 addr, u32* val) override; - void DataRead32S(u32 addr, u32* val) override; - void DataWrite8(u32 addr, u8 val) override; - void DataWrite16(u32 addr, u16 val) override; - void DataWrite32(u32 addr, u32 val) override; - void DataWrite32S(u32 addr, u32 val) override; + bool DataRead8(u32 addr, u32* val) override; + bool DataRead16(u32 addr, u32* val) override; + bool DataRead32(u32 addr, u32* val) override; + bool DataRead32S(u32 addr, u32* val) override; + bool DataWrite8(u32 addr, u8 val) override; + bool DataWrite16(u32 addr, u16 val) override; + bool DataWrite32(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override { @@ -399,18 +400,19 @@ public: return BusRead32(addr); } - void DataRead8(u32 addr, u32* val) override; - void DataRead16(u32 addr, u32* val) override; - void DataRead32(u32 addr, u32* val) override; - void DataRead32S(u32 addr, u32* val) override; - void DataWrite8(u32 addr, u8 val) override; - void DataWrite16(u32 addr, u16 val) override; - void DataWrite32(u32 addr, u32 val) override; - void DataWrite32S(u32 addr, u32 val) override; + bool DataRead8(u32 addr, u32* val) override; + bool DataRead16(u32 addr, u32* val) override; + bool DataRead32(u32 addr, u32* val) override; + bool DataRead32S(u32 addr, u32* val) override; + bool DataWrite8(u32 addr, u8 val) override; + bool DataWrite16(u32 addr, u16 val) override; + bool DataWrite32(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; void AddCycles_CD() override; + protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index f5bf7713..ff79597e 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -69,6 +69,14 @@ void T_UNK(ARM* cpu) cpu->JumpTo(cpu->ExceptionBase + 0x04); } +void A_BKPT(ARM* cpu) +{ + if (cpu->Num == 1) A_UNK(cpu); // checkme + + Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message + ((ARMv5*)cpu)->PrefetchAbort(); +} + void A_MSR_IMM(ARM* cpu) @@ -90,7 +98,8 @@ void A_MSR_IMM(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - cpu->AddCycles_C(); + if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 + else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -101,12 +110,9 @@ void A_MSR_IMM(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; - - if (!(cpu->CurInstr & (1<<22))) - mask &= 0xFFFFFFDF; + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9 + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 + if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; @@ -121,7 +127,26 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - cpu->AddCycles_C(); + if (cpu->CPSR & 0x20) [[unlikely]] + { + if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect. + else + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least + } + } + + if (cpu->Num != 1) + { + if (cpu->CurInstr & (1<<22)) + { + cpu->AddCycles_CI(2); // spsr + } + else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc + else cpu->AddCycles_C(); + } + else cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) @@ -143,7 +168,8 @@ void A_MSR_REG(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - cpu->AddCycles_C(); + if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 + else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -154,12 +180,9 @@ void A_MSR_REG(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; - - if (!(cpu->CurInstr & (1<<22))) - mask &= 0xFFFFFFDF; + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9 + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 + if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; @@ -174,7 +197,26 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - cpu->AddCycles_C(); + if (cpu->CPSR & 0x20) [[unlikely]] + { + if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect. + else + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least + } + } + + if (cpu->Num != 1) + { + if (cpu->CurInstr & (1<<22)) + { + cpu->AddCycles_CI(2); // spsr + } + else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc + else cpu->AddCycles_C(); + } + else cpu->AddCycles_C(); } void A_MRS(ARM* cpu) @@ -201,8 +243,15 @@ void A_MRS(ARM* cpu) else psr = cpu->CPSR; - cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; - cpu->AddCycles_C(); + if (((cpu->CurInstr>>12) & 0xF) == 15) + { + if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme + cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb? + } + else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; + + if (cpu->Num != 1) cpu->AddCycles_CI(1); // arm9 + else cpu->AddCycles_C(); // arm7 } @@ -216,10 +265,12 @@ void A_MCR(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; + if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (cpu->Num==0 && cp==15) { - ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]); + ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, val); } else if (cpu->Num==1 && cp==14) { @@ -244,10 +295,17 @@ void A_MRC(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + u32 rd = (cpu->CurInstr>>12) & 0xF; if (cpu->Num==0 && cp==15) { - cpu->R[(cpu->CurInstr>>12)&0xF] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); + if (rd != 15) cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); + else + { + // r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status" + u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo) & 0xF0000000; + cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags; + } } else if (cpu->Num==1 && cp==14) { @@ -259,12 +317,13 @@ void A_MRC(ARM* cpu) return A_UNK(cpu); // TODO: check what kind of exception it really is } - cpu->AddCycles_CI(2 + 1); // TODO: checkme + if (cpu->Num != 1) cpu->AddCycles_CI(1); // checkme + else cpu->AddCycles_CI(2 + 1); // TODO: checkme } -void A_SVC(ARM* cpu) +void A_SVC(ARM* cpu) // A_SWI { u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; @@ -276,7 +335,7 @@ void A_SVC(ARM* cpu) cpu->JumpTo(cpu->ExceptionBase + 0x08); } -void T_SVC(ARM* cpu) +void T_SVC(ARM* cpu) // T_SWI { u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; diff --git a/src/ARMInterpreter.h b/src/ARMInterpreter.h index 1066ac69..4c5ddafe 100644 --- a/src/ARMInterpreter.h +++ b/src/ARMInterpreter.h @@ -36,6 +36,7 @@ void A_MRS(ARM* cpu); void A_MCR(ARM* cpu); void A_MRC(ARM* cpu); void A_SVC(ARM* cpu); +void A_BKPT(ARM* cpu); void T_SVC(ARM* cpu); diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 167e184e..72992f0f 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -19,6 +19,7 @@ #include #include "ARM.h" #include "NDS.h" +#include "ARMInterpreter_MultiplySuperLLE.h" namespace melonDS::ARMInterpreter { @@ -581,8 +582,27 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ - cpu->SetNZ(res & 0x80000000, \ - !res); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ + { \ + if (cpu->Num == 1) \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ + else cpu->JumpTo(res & ~1, true); /* TSTP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TST,_S) @@ -591,8 +611,27 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ - cpu->SetNZ(res & 0x80000000, \ - !res); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ + { \ + if (cpu->Num == 1) \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ + else cpu->JumpTo(res & ~1, true); /* TEQP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TEQ,_S) @@ -601,10 +640,31 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ - cpu->SetNZCV(res & 0x80000000, \ - !res, \ - CarrySub(a, b), \ - OverflowSub(a, b)); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ + { \ + if (cpu->Num == 1) \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarrySub(a, b), \ + OverflowSub(a, b)); \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ + else cpu->JumpTo(res & ~1, true); /* CMPP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarrySub(a, b), \ + OverflowSub(a, b)); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMP,) @@ -613,10 +673,31 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ - cpu->SetNZCV(res & 0x80000000, \ - !res, \ - CarryAdd(a, b), \ - OverflowAdd(a, b)); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ + { \ + if (cpu->Num == 1) \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarryAdd(a, b), \ + OverflowAdd(a, b)); \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ + else cpu->JumpTo(res & ~1, true); /* CMNP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarryAdd(a, b), \ + OverflowAdd(a, b)); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMN,) @@ -766,12 +847,14 @@ void A_MUL(ARM* cpu) u32 res = rm * rs; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + // all multiply instructions fail writes to r15 on arm7/9 + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -783,6 +866,7 @@ void A_MUL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4)); } cpu->AddCycles_CI(cycles); @@ -795,13 +879,14 @@ void A_MLA(ARM* cpu) u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; u32 res = (rm * rs) + rn; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -813,6 +898,7 @@ void A_MLA(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -825,24 +911,27 @@ void A_UMULL(ARM* cpu) u64 res = (u64)rm * (u64)rs; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -857,25 +946,28 @@ void A_UMLAL(ARM* cpu) u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); res += rd; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -887,25 +979,28 @@ void A_SMULL(ARM* cpu) u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -920,25 +1015,28 @@ void A_SMLAL(ARM* cpu) s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -959,8 +1057,10 @@ void A_SMLAxy(ARM* cpu) u32 res_mul = ((s16)rm * (s16)rs); u32 res = res_mul + rn; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; @@ -980,8 +1080,9 @@ void A_SMLAWy(ARM* cpu) u32 res_mul = ((s64)(s32)rm * (s16)rs) >> 16; u32 res = res_mul + rn; - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; @@ -1001,8 +1102,9 @@ void A_SMULxy(ARM* cpu) else rs &= 0xFFFF; u32 res = ((s16)rm * (s16)rs); - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1017,8 +1119,9 @@ void A_SMULWy(ARM* cpu) else rs &= 0xFFFF; u32 res = ((s64)(s32)rm * (s16)rs) >> 16; - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1039,8 +1142,11 @@ void A_SMLALxy(ARM* cpu) s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); cpu->AddCycles_CI(1); // TODO: interlock?? } @@ -1067,7 +1173,8 @@ void A_CLZ(ARM* cpu) val |= 0x1; } - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); + else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); } @@ -1085,7 +1192,10 @@ void A_QADD(ARM* cpu) cpu->CPSR |= 0x08000000; } - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + // all saturated math instructions fail writes to r15 + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } @@ -1102,8 +1212,10 @@ void A_QSUB(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1128,8 +1240,10 @@ void A_QDADD(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1154,8 +1268,10 @@ void A_QDSUB(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1460,18 +1576,18 @@ void T_MUL_REG(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); - s32 cycles = 0; + s32 cycles; if (cpu->Num == 0) { - cycles += 3; + cycles = 3; } else { - cpu->SetC(0); // carry flag destroyed, they say. whatever that means... - if (a & 0xFF000000) cycles += 4; - else if (a & 0x00FF0000) cycles += 3; - else if (a & 0x0000FF00) cycles += 2; - else cycles += 1; + if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; + else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2; + else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3; + else cycles = 4; + cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means... } cpu->AddCycles_CI(cycles); } @@ -1534,6 +1650,18 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + + if ((cpu->Num == 1) && (rd == 15)) + { + u32 oldpsr = cpu->CPSR; + cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. + if (!(cpu->CPSR & 0x20)) + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR |= 0x20; // keep it from crashing the emulator at least + } + } + cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 623be41a..5731a0b6 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -104,6 +104,9 @@ void T_BL_LONG_1(ARM* cpu) void T_BL_LONG_2(ARM* cpu) { + if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction. + return T_UNK(cpu); // TODO: Check ARM7 for exceptions + s32 offset = (cpu->CurInstr & 0x7FF) << 1; u32 pc = cpu->R[14] + offset; cpu->R[14] = (cpu->R[15] - 2) | 1; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index f7c24312..a2c9d7cc 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -58,89 +58,168 @@ namespace melonDS::ARMInterpreter shiftop(offset, shift); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; +enum class Writeback +{ + None = 0, + Pre, + Post, + Trans, +}; + +template +void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset) +{ + static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + + u32 addr; + if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; + else addr = cpu->R[rn]; + + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; + } + + u32 val; + bool dabort; + if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, &val); + if constexpr (size == 16) dabort = !cpu->DataRead16(addr, &val); + if constexpr (size == 32) dabort = !cpu->DataRead32(addr, &val); + + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0 && (cpu->CPSR & 0x1F) != 0x10) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_PrivMap; + } + + cpu->AddCycles_CDI(); + if (dabort) [[unlikely]] + { + ((ARMv5*)cpu)->DataAbort(); + return; + } + + if constexpr (size == 8 && signextend) val = (s32)(s8)val; + + if constexpr (size == 16) + { + if (cpu->Num == 1) + { + val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 + if constexpr (signextend) val = (s32)((addr&0x1) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. + } + else if constexpr (signextend) val = (s32)(s16)val; + } + + if constexpr (size == 32) val = ROR(val, ((addr&0x3)<<3)); + + + if constexpr (writeback >= Writeback::Post) addr += offset; + if constexpr (writeback != Writeback::None) + { + if (rn != 15) [[likely]] // r15 writeback fails on arm9 + { + cpu->R[rn] = addr; + } + else if (cpu->Num == 1) // arm 7 + { + // note that at no point does it actually write the value it loaded to a register... + cpu->JumpTo((addr+4) & ~1); + return; + } + } + + if (rd == 15) + { + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; + cpu->JumpTo(val); + } + else cpu->R[rd] = val; +} + +template +void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset) +{ + static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + + u32 addr; + if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; + else addr = cpu->R[rn]; + + u32 storeval = cpu->R[rd]; + if (rd == 15) storeval += 4; + + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; + } + + bool dabort; + if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval); + if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval); + if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval); + + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0 && (cpu->CPSR & 0x1F) != 0x10) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_PrivMap; + } + + cpu->AddCycles_CD(); + if (dabort) [[unlikely]] + { + ((ARMv5*)cpu)->DataAbort(); + return; + } + + if constexpr (writeback >= Writeback::Post) addr += offset; + if constexpr (writeback != Writeback::None) + { + if (rn != 15) [[likely]] // r15 writeback fails on arm9 + { + cpu->R[rn] = addr; + } + else if (cpu->Num == 1) // arm 7 + { + cpu->JumpTo(addr & ~1); + } + } +} #define A_STR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ - storeval += 4; \ - cpu->DataWrite32(offset, storeval); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode (bit21) #define A_STR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ - storeval += 4; \ - cpu->DataWrite32(addr, storeval); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_STRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode (bit21) #define A_STRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead32(offset, &val); \ - val = ROR(val, ((offset&0x3)<<3)); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - { \ - if (cpu->Num==1) val &= ~0x1; \ - cpu->JumpTo(val); \ - } \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - } + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode #define A_LDR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead32(addr, &val); \ - val = ROR(val, ((addr&0x3)<<3)); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - { \ - if (cpu->Num==1) val &= ~0x1; \ - cpu->JumpTo(val); \ - } \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - } + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead8(offset, &val); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CDI(); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode #define A_LDRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead8(addr, &val); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CDI(); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); @@ -224,104 +303,94 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_STRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + StoreSingle<16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: CHECK LDRD/STRD TIMINGS!! #define A_LDRD \ if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED LDRD %d\n", r+1); } \ - cpu->DataRead32 (offset , &cpu->R[r ]); \ - cpu->DataRead32S(offset+4, &cpu->R[r+1]); \ - cpu->AddCycles_CDI(); + if (r&1) { A_UNK(cpu); return; } \ + bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \ + u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \ + if (dabort) { \ + cpu->AddCycles_CDI(); \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ + cpu->AddCycles_CDI(); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED LDRD_POST %d\n", r+1); } \ - cpu->DataRead32 (addr , &cpu->R[r ]); \ - cpu->DataRead32S(addr+4, &cpu->R[r+1]); \ - cpu->AddCycles_CDI(); + if (r&1) { A_UNK(cpu); return; } \ + bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \ + u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \ + if (dabort) { \ + cpu->AddCycles_CDI(); \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ + cpu->AddCycles_CDI(); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED STRD %d\n", r+1); } \ - cpu->DataWrite32 (offset , cpu->R[r ]); \ - cpu->DataWrite32S(offset+4, cpu->R[r+1]); \ - cpu->AddCycles_CD(); + if (r&1) { A_UNK(cpu); return; } \ + bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ + cpu->AddCycles_CD(); \ + if (dabort) [[unlikely]] { \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED STRD_POST %d\n", r+1); } \ - cpu->DataWrite32 (addr , cpu->R[r ]); \ - cpu->DataWrite32S(addr+4, cpu->R[r+1]); \ - cpu->AddCycles_CD(); + if (r&1) { A_UNK(cpu); return; } \ + bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + dabort |= !cpu->DataWrite32S (addr+4, storeval); \ + cpu->AddCycles_CD(); \ + if (dabort) [[unlikely]] { \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead8(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead8(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - cpu->AddCycles_CDI(); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_IMPLEMENT_HD_LDRSTR(x) \ @@ -358,48 +427,122 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) -void A_SWP(ARM* cpu) +template +inline void SWP(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF]; + if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; - cpu->DataRead32(base, &val); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); + if ((byte ? cpu->DataRead8 (base, &val) + : cpu->DataRead32(base, &val))) [[likely]] + { + u32 numD = cpu->DataCycles; - u32 numD = cpu->DataCycles; - cpu->DataWrite32(base, rm); - cpu->DataCycles += numD; + if ((byte ? cpu->DataWrite8 (base, rm) + : cpu->DataWrite32(base, rm))) [[likely]] + { + // rd only gets updated if both read and write succeed + u32 rd = (cpu->CurInstr >> 12) & 0xF; + + if constexpr (!byte) val = ROR(val, 8*(base&0x3)); + + if (rd != 15) cpu->R[rd] = val; + else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? + } + else ((ARMv5*)cpu)->DataAbort(); + + cpu->DataCycles += numD; + } + else ((ARMv5*)cpu)->DataAbort(); cpu->AddCycles_CDI(); } +void A_SWP(ARM* cpu) +{ + SWP(cpu); +} + void A_SWPB(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; - - cpu->DataRead8(base, &cpu->R[(cpu->CurInstr >> 12) & 0xF]); - - u32 numD = cpu->DataCycles; - cpu->DataWrite8(base, rm); - cpu->DataCycles += numD; - - cpu->AddCycles_CDI(); + SWP(cpu); } +void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) +{ + enum // flags + { + load = (1<<0), + writeback = (1<<1), + decrement = (1<<2), + preinc = (1<<3), + restoreorthumb = (1<<4), // specifies restore cpsr for loads, thumb instr for stores + }; + if (cpu->Num == 1) + { + u32 base = cpu->R[baseid]; + bool flagpreinc = flags & preinc; + + if (flags & decrement) + { + flagpreinc = !flagpreinc; + base -= 0x40; + } + if (flagpreinc) base+=4; + + if (flags & load) + { + u32 pc; + cpu->DataRead32(base, &pc); + + cpu->AddCycles_CDI(); + cpu->JumpTo(pc, flags & restoreorthumb); + } + else + { + cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4)); + + cpu->AddCycles_CD(); + } + } + else + { + cpu->AddCycles_C(); // checkme + } + + if (flags & writeback) + { + if (flags & decrement) cpu->R[baseid] -= 0x40; + else cpu->R[baseid] += 0x40; + } +} void A_LDM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; u32 base = cpu->R[baseid]; u32 wbbase; + u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - - if (!(cpu->CurInstr & (1<<23))) + bool dabort = false; + + if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] { + EmptyRListLDMSTM(cpu, baseid, ((1 << 0) | // load + (((cpu->CurInstr >> 21) & 1) << 1) | // writeback + ((!(cpu->CurInstr & (1<<23))) << 2) | // decrement + ((preinc >> 24) << 3) | // preinc + (((cpu->CurInstr >> 22) & 1) << 4))); // restore + return; + } + + if (!(cpu->CurInstr & (1<<23))) // decrement + { + // decrement is actually an increment starting from the end address for (int i = 0; i < 16; i++) { if (cpu->CurInstr & (1<CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); @@ -423,8 +567,13 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + // remaining loads still occur but are not written to a reg after a data abort is raised + if (!dabort) [[likely]] cpu->R[i] = val; + first = false; if (!preinc) base += 4; } @@ -434,15 +583,28 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<<15)) { if (preinc) base += 4; - if (first) cpu->DataRead32 (base, &pc); - else cpu->DataRead32S(base, &pc); + dabort |= !(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc)); + if (!preinc) base += 4; - if (cpu->Num == 1) + if (cpu->Num == 1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc &= ~0x1; } - if (cpu->CurInstr & (1<<21)) + // handle data aborts + if (dabort) [[unlikely]] + { + if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) + cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + + // writeback to base + if (cpu->CurInstr & (1<<21) && (baseid != 15)) { // post writeback if (cpu->CurInstr & (1<<23)) @@ -464,6 +626,7 @@ void A_LDM(ARM* cpu) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + // jump if pc got written if (cpu->CurInstr & (1<<15)) cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); @@ -477,6 +640,17 @@ void A_STM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + bool dabort = false; + + if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] + { + EmptyRListLDMSTM(cpu, baseid, ((0 << 0) | // load + (((cpu->CurInstr >> 21) & 1) << 1) | // writeback + ((!(cpu->CurInstr & (1<<23))) << 2) | // decrement + ((preinc >> 24) << 3) | // preinc + (0 << 4))); // thumb + return; + } if (!(cpu->CurInstr & (1<<23))) { @@ -486,7 +660,7 @@ void A_STM(ARM* cpu) base -= 4; } - if (cpu->CurInstr & (1<<21)) + if ((cpu->CurInstr & (1<<21)) && (baseid != 15)) cpu->R[baseid] = base; preinc = !preinc; @@ -510,15 +684,19 @@ void A_STM(ARM* cpu) { if (preinc) base += 4; + u32 val; if (i == baseid && !isbanked) { if ((cpu->Num == 0) || (!(cpu->CurInstr & ((1<DataWrite32(base, oldbase) : cpu->DataWrite32S(base, oldbase); - else - first ? cpu->DataWrite32(base, base) : cpu->DataWrite32S(base, base); // checkme + val = oldbase; + else val = base; } - else - first ? cpu->DataWrite32(base, cpu->R[i]) : cpu->DataWrite32S(base, cpu->R[i]); + else val = cpu->R[i]; + + if (i == 15) val+=4; + + dabort |= !(first ? cpu->DataWrite32 (base, val) + : cpu->DataWrite32S(base, val)); first = false; @@ -528,10 +706,21 @@ void A_STM(ARM* cpu) if (cpu->CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + // handle data aborts + if (dabort) [[unlikely]] + { + // restore original value of base + cpu->R[baseid] = oldbase; + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } - if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)) && (baseid != 15)) cpu->R[baseid] = base; + cpu->AddCycles_CD(); } @@ -545,157 +734,98 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); - cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); + bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); + if (dabort) [[unlikely]] + { + ((ARMv5*)cpu)->DataAbort(); + } } void T_STR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - - u32 val; - cpu->DataRead32(addr, &val); - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRSB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRSH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STR_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); } void T_LDR_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - u32 val; - cpu->DataRead32(offset, &val); - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); } void T_STRB_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); } void T_LDRB_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); } void T_STRH_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); } void T_LDRH_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); } void T_STR_SPREL(ARM* cpu) { - u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; - - cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); } void T_LDR_SPREL(ARM* cpu) { - u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; - - cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); } @@ -703,6 +833,7 @@ void T_PUSH(ARM* cpu) { int nregs = 0; bool first = true; + bool dabort = false; for (int i = 0; i < 8; i++) { @@ -712,17 +843,24 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) nregs++; + + if (!nregs) [[unlikely]] + { + EmptyRListLDMSTM(cpu, 13, 0b11110); + return; + } u32 base = cpu->R[13]; base -= (nregs<<2); - cpu->R[13] = base; + u32 wbbase = base; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]); - else cpu->DataWrite32S(base, cpu->R[i]); + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i])); + first = false; base += 4; } @@ -730,10 +868,19 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) { - if (first) cpu->DataWrite32 (base, cpu->R[14]); - else cpu->DataWrite32S(base, cpu->R[14]); + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14]) + : cpu->DataWrite32S(base, cpu->R[14])); } + if (dabort) [[unlikely]] + { + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + + cpu->R[13] = wbbase; + cpu->AddCycles_CD(); } @@ -741,13 +888,24 @@ void T_POP(ARM* cpu) { u32 base = cpu->R[13]; bool first = true; + bool dabort = false; + + if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] + { + EmptyRListLDMSTM(cpu, 13, 0b00011); + return; + } for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + if (!dabort) [[likely]] cpu->R[i] = val; + first = false; base += 4; } @@ -756,14 +914,25 @@ void T_POP(ARM* cpu) if (cpu->CurInstr & (1<<8)) { u32 pc; - if (first) cpu->DataRead32 (base, &pc); - else cpu->DataRead32S(base, &pc); - if (cpu->Num==1) pc |= 0x1; + dabort |= !(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc)); + + if (dabort) [[unlikely]] goto dataabort; + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; cpu->JumpTo(pc); base += 4; } + if (dabort) [[unlikely]] + { + dataabort: + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + cpu->R[13] = base; + cpu->AddCycles_CDI(); } @@ -771,18 +940,33 @@ void T_STMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; + bool dabort = false; + + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] + { + EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010); + return; + } for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]); - else cpu->DataWrite32S(base, cpu->R[i]); + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i])); + first = false; base += 4; } } + if (dabort) [[unlikely]] + { + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; cpu->AddCycles_CD(); @@ -792,18 +976,35 @@ void T_LDMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; + bool dabort = false; + + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] + { + EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011); + return; + } for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + if (!dabort) [[likely]] cpu->R[i] = val; first = false; base += 4; } } + if (dabort) [[unlikely]] + { + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; diff --git a/src/ARMInterpreter_MultiplySuperLLE.h b/src/ARMInterpreter_MultiplySuperLLE.h new file mode 100644 index 00000000..21b17bbc --- /dev/null +++ b/src/ARMInterpreter_MultiplySuperLLE.h @@ -0,0 +1,136 @@ +#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H +#define ARMINTERPRETER_MULTIPLYSUPERLLE_H + +#include "types.h" + +using namespace melonDS; + +/* + Copyright (c) 2024 zaydlang + + This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + + + + +// code taken from: (also features a few alternative implementations that could maybe be worth looking at?) +// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h +// based on research that can be found here: https://bmchtech.github.io/post/multiply/ + +// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI. + + +// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2 +static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) { + // Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31 + multiplicand |= 1; + + // Optimized first iteration + u32 booth = (s32)(multiplier << 31) >> 31; + u32 carry = booth * multiplicand; + // Pre-populate accumulator for output + u32 output = accumulator; + + u32 sum = output + carry; + int shift = 29; + do { + for (int i = 0; i < 4; i++, shift -= 2) { + // Get next booth factor (-2 to 2, shifted left by 30-shift) + u32 next_booth = (s32)(multiplier << shift) >> shift; + u32 factor = next_booth - booth; + booth = next_booth; + // Get scaled value of booth addend + u32 addend = multiplicand * factor; + // Combine the addend with the CSA + // Not performing any masking seems to work because the lower carries can't propagate to bit 31 + output ^= carry ^ addend; + sum += addend; + carry = sum - output; + } + } while (booth != multiplier); + + return carry >> 31; +} + +// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended) +static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) { + // Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63 + // This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks), + // and only the last two booth iterations are needed + + // Set the low bit of the multiplicand to cause negation to invert the upper bits + multiplicand |= 1; + + // Pre-populate magic bit 61 for carry + u32 carry = ~accum_hi & UINT32_C(0x20000000); + // Pre-populate magic bits 63-60 for output (with carry magic pre-added in) + u32 output = accum_hi - UINT32_C(0x08000000); + + // Get factors from the top 3 booth chunks + u32 booth0 = (s32)(multiplier << 27) >> 27; + u32 booth1 = (s32)(multiplier << 29) >> 29; + u32 booth2 = (s32)(multiplier << 31) >> 31; + u32 factor0 = multiplier - booth0; + u32 factor1 = booth0 - booth1; + u32 factor2 = booth1 - booth2; + + // Get scaled value of the 3rd top booth addend + u32 addend = multiplicand * factor2; + // Finalize bits 61-60 of output magic using its sign + output -= addend & UINT32_C(0x10000000); + // Get scaled value of the 2nd top booth addend + addend = multiplicand * factor1; + // Finalize bits 63-62 of output magic using its sign + output -= addend & UINT32_C(0x40000000); + + // Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration + u32 sum = output + (addend & UINT32_C(0x20000000)); + // Subtract out the carry magic to get the actual output magic + output -= carry; + + // Get scaled value of the 1st top booth addend + addend = multiplicand * factor0; + // Add to bit 62 and propagate the carry + sum += addend & UINT32_C(0x40000000); + + // Cancel out the output magic bit 63 to get the carry bit 63 + return (sum ^ output) >> 31; +} + + +// also for MLAS and MUL (thumb ver.) +inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle) +{ + if (lastcycle) + return (rs >> 30) == -2; + else + return booths_multiplication32_opt(rm, rs, rn); +} + +// also for UMLALS +inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +// also for SMLALS +inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +#endif diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 1ebcce8e..8bf509e9 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -588,7 +588,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept u32 numWriteAddrs = 0, writeAddrsTranslated = 0; cpu->FillPipeline(); - u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; + u32 nextInstr[2] = {(u32)cpu->NextInstr[0], (u32)cpu->NextInstr[1]}; u32 nextInstrAddr[2] = {blockAddr, r15}; JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index 58838307..d1be9761 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -194,6 +194,7 @@ const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX); const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG); const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK); +const u32 A_BKPT = A_BranchAlways | A_Link | ak(ak_UNK); const u32 A_MSR_IMM = ak(ak_MSR_IMM); const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG); const u32 A_MRS = A_Write12 | ak(ak_MRS); diff --git a/src/ARM_InstrTable.h b/src/ARM_InstrTable.h index 8213c2e0..2c480f8d 100644 --- a/src/ARM_InstrTable.h +++ b/src/ARM_InstrTable.h @@ -130,7 +130,7 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0010 0000 A_MSR_REG, A_BX, A_UNK, A_BLX_REG, - A_UNK, A_QSUB, A_UNK, A_UNK, + A_UNK, A_QSUB, A_UNK, A_BKPT, A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG, A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG, diff --git a/src/CP15.cpp b/src/CP15.cpp index c271e180..fba73bda 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -266,8 +266,6 @@ void ARMv5::UpdatePURegions(bool update_all) // PU disabled u8 mask = 0x07; - if (CP15Control & (1<<2)) mask |= 0x30; - if (CP15Control & (1<<12)) mask |= 0x40; memset(PU_UserMap, mask, 0x100000); memset(PU_PrivMap, mask, 0x100000); @@ -579,7 +577,7 @@ void ARMv5::CP15Write(u32 id, u32 val) case 0x670: case 0x671: char log_output[1024]; - PU_Region[(id >> 4) & 0xF] = val; + PU_Region[(id >> 4) & 0xF] = val & ~(0x3F<<6); std::snprintf(log_output, sizeof(log_output), @@ -773,16 +771,15 @@ u32 ARMv5::CP15Read(u32 id) const // TCM are handled here. // TODO: later on, handle PU, and maybe caches -u32 ARMv5::CodeRead32(u32 addr, bool branch) +u64 ARMv5::CodeRead32(u32 addr, bool branch) { - /*if (branch || (!(addr & 0xFFF))) + // prefetch abort + // the actual exception is not raised until the aborted instruction is executed + if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] { - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return 0; - } - }*/ + CodeCycles = 1; + return ((u64)1<<63); + } if (addr < ITCMSize) { @@ -807,150 +804,163 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) } -void ARMv5::DataRead8(u32 addr, u32* val) +bool ARMv5::DataRead8(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - if (addr < ITCMSize) { DataCycles = 1; *val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } - + *val = BusRead8(addr); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataRead16(u32 addr, u32* val) +bool ARMv5::DataRead16(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - addr &= ~1; if (addr < ITCMSize) { DataCycles = 1; *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } - + *val = BusRead16(addr); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataRead32(u32 addr, u32* val) +bool ARMv5::DataRead32(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - addr &= ~3; if (addr < ITCMSize) { DataCycles = 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead32(addr); DataCycles = MemTimings[addr >> 12][2]; + return true; } -void ARMv5::DataRead32S(u32 addr, u32* val) +bool ARMv5::DataRead32S(u32 addr, u32* val) { + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + { + DataCycles += 1; + return false; + } + addr &= ~3; if (addr < ITCMSize) { DataCycles += 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles += 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead32(addr); DataCycles += MemTimings[addr >> 12][3]; + return true; } -void ARMv5::DataWrite8(u32 addr, u8 val) +bool ARMv5::DataWrite8(u32 addr, u8 val) { - if (!(PU_Map[addr>>12] & 0x02)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - if (addr < ITCMSize) { DataCycles = 1; *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite8(addr, val); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataWrite16(u32 addr, u16 val) +bool ARMv5::DataWrite16(u32 addr, u16 val) { - if (!(PU_Map[addr>>12] & 0x02)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - addr &= ~1; if (addr < ITCMSize) @@ -958,29 +968,30 @@ void ARMv5::DataWrite16(u32 addr, u16 val) DataCycles = 1; *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite16(addr, val); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataWrite32(u32 addr, u32 val) +bool ARMv5::DataWrite32(u32 addr, u32 val) { - if (!(PU_Map[addr>>12] & 0x02)) + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); - return; + DataCycles = 1; + return false; } - DataRegion = addr; - addr &= ~3; if (addr < ITCMSize) @@ -988,21 +999,30 @@ void ARMv5::DataWrite32(u32 addr, u32 val) DataCycles = 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite32(addr, val); DataCycles = MemTimings[addr >> 12][2]; + return true; } -void ARMv5::DataWrite32S(u32 addr, u32 val) +bool ARMv5::DataWrite32S(u32 addr, u32 val) { + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] + { + DataCycles += 1; + return false; + } + addr &= ~3; if (addr < ITCMSize) @@ -1012,17 +1032,18 @@ void ARMv5::DataWrite32S(u32 addr, u32 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles += 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite32(addr, val); DataCycles += MemTimings[addr >> 12][3]; + return true; } void ARMv5::GetCodeMemRegion(u32 addr, MemRegion* region) diff --git a/src/DSi.cpp b/src/DSi.cpp index 00ed8da0..0a0bd36d 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -162,6 +162,7 @@ void DSi::Reset() SCFG_Clock9 = 0x0187; // CHECKME SCFG_Clock7 = 0x0187; SCFG_EXT[0] = 0x8307F100; + SetVRAMTimings(true); SCFG_EXT[1] = 0x93FFFB06; SCFG_MC = 0x0010 | (~((u32)(NDSCartSlot.GetCart() != nullptr))&1);//0x0011; SCFG_RST = 0; @@ -235,6 +236,7 @@ void DSi::DoSavestateExtra(Savestate* file) Set_SCFG_Clock9(SCFG_Clock9); Set_SCFG_MC(SCFG_MC); DSP.SetRstLine(SCFG_RST & 0x0001); + SetVRAMTimings(SCFG_EXT[0] & (1<<13)); MBK[0][8] = 0; MBK[1][8] = 0; @@ -713,6 +715,7 @@ void DSi::SoftReset() SCFG_Clock9 = 0x0187; // CHECKME SCFG_Clock7 = 0x0187; SCFG_EXT[0] = 0x8307F100; + SetVRAMTimings(true); SCFG_EXT[1] = 0x93FFFB06; SCFG_MC = 0x0010;//0x0011; // TODO: is this actually reset? @@ -1303,6 +1306,14 @@ void DSi::Set_SCFG_MC(u32 val) } } +void DSi::SetVRAMTimings(bool extrabuswidth) +{ + if (extrabuswidth) + SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 32, 1, 1); // dsi vram + else + SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 16, 1, 1); // ds vram +} + u8 DSi::ARM9Read8(u32 addr) { @@ -2541,11 +2552,18 @@ void DSi::ARM9IOWrite32(u32 addr, u32 val) u32 oldram = (SCFG_EXT[0] >> 14) & 0x3; u32 newram = (val >> 14) & 0x3; + u32 oldvram = (SCFG_EXT[0] & (1<<13)); + u32 newvram = (val & (1<<13)); + SCFG_EXT[0] &= ~0x8007F19F; SCFG_EXT[0] |= (val & 0x8007F19F); SCFG_EXT[1] &= ~0x0000F080; SCFG_EXT[1] |= (val & 0x0000F080); Log(LogLevel::Debug, "SCFG_EXT = %08X / %08X (val9 %08X)\n", SCFG_EXT[0], SCFG_EXT[1], val); + + if (oldvram != newvram) + SetVRAMTimings(newvram); + /*switch ((SCFG_EXT[0] >> 14) & 0x3) { case 0: diff --git a/src/DSi.h b/src/DSi.h index 23a2460c..7b7a94a2 100644 --- a/src/DSi.h +++ b/src/DSi.h @@ -96,6 +96,7 @@ public: void MapNWRAM_B(u32 num, u8 val); void MapNWRAM_C(u32 num, u8 val); void MapNWRAMRange(u32 cpu, u32 num, u32 val); + void SetVRAMTimings(bool extrabuswidth); u8 ARM9Read8(u32 addr) override; u16 ARM9Read16(u32 addr) override;