From 065573f316c1c2003837d81e645f6050e0fbd006 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 31 May 2024 18:09:45 -0400 Subject: [PATCH 001/115] fix writebacks overwriting registers swapped with spsr fixes gbarunner3 --- src/ARMInterpreter_LoadStore.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 91acaacc..e21d7757 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -429,10 +429,10 @@ void A_LDM(ARM* cpu) if (!preinc) base += 4; } } - + + u32 pc; if (cpu->CurInstr & (1<<15)) { - u32 pc; if (preinc) base += 4; if (first) cpu->DataRead32 (base, &pc); else cpu->DataRead32S(base, &pc); @@ -440,8 +440,6 @@ void A_LDM(ARM* cpu) if (cpu->Num == 1) pc &= ~0x1; - - cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); } if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -466,6 +464,9 @@ void A_LDM(ARM* cpu) cpu->R[baseid] = wbbase; } + if (cpu->CurInstr & (1<<15)) + cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + cpu->AddCycles_CDI(); } From 960f063eaa8c298600198916f91811d34114e249 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 2 Jun 2024 00:11:01 -0400 Subject: [PATCH 002/115] improve data aborts for ldm --- src/ARMInterpreter_LoadStore.cpp | 73 +++++++++++++++++++++++--------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index e21d7757..dfdb98c2 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -397,6 +397,7 @@ void A_LDM(ARM* cpu) u32 wbbase; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + int abortreg = 16; if (!(cpu->CurInstr & (1<<23))) { @@ -415,10 +416,32 @@ void A_LDM(ARM* cpu) preinc = !preinc; } + // check for data aborts + if (cpu->Num == 0) + { + u32 tmpbase = base; + for (int i = 0; i < 16; i++) + { + if (cpu->CurInstr & (1<PU_Map[tmpbase>>12] & 0x01) + { + if (!preinc) tmpbase += 4; + } + else + { + abortreg = i; + break; + } + } + } + } + if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); - for (int i = 0; i < 15; i++) + for (int i = 0; i < std::min(15, abortreg); i++) { if (cpu->CurInstr & (1<CurInstr & (1<<15)) + if ((cpu->CurInstr & (1<<15)) && (abortreg == 16)) { if (preinc) base += 4; if (first) cpu->DataRead32 (base, &pc); @@ -445,27 +468,35 @@ void A_LDM(ARM* cpu) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - if (cpu->CurInstr & (1<<21)) + // if it's 16 then there was no data abort + if (abortreg == 16) { - // post writeback - if (cpu->CurInstr & (1<<23)) - wbbase = base; - - if (cpu->CurInstr & (1 << baseid)) + if (cpu->CurInstr & (1<<21)) { - if (cpu->Num == 0) - { - u32 rlist = cpu->CurInstr & 0xFFFF; - if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) - cpu->R[baseid] = wbbase; - } - } - else - cpu->R[baseid] = wbbase; - } + // post writeback + if (cpu->CurInstr & (1<<23)) + wbbase = base; - if (cpu->CurInstr & (1<<15)) - cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + if (cpu->CurInstr & (1 << baseid)) + { + if (cpu->Num == 0) + { + u32 rlist = cpu->CurInstr & 0xFFFF; + if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) + cpu->R[baseid] = wbbase; + } + } + else + cpu->R[baseid] = wbbase; + } + + if (cpu->CurInstr & (1<<15)) + cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + } + else + { + ((ARMv5*)cpu)->DataAbort(); + } cpu->AddCycles_CDI(); } From 63d4b787334f2d2f41e220c88044e933a24b0266 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 2 Jun 2024 10:13:50 -0400 Subject: [PATCH 003/115] improve implementation --- src/ARM.cpp | 6 ++-- src/ARM.h | 12 +++---- src/ARMInterpreter_LoadStore.cpp | 56 +++++++++++--------------------- src/CP15.cpp | 22 +++++++++---- 4 files changed, 44 insertions(+), 52 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index c2f6a6c2..c96cb65d 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1168,21 +1168,23 @@ void ARMv4::DataRead16(u32 addr, u32* val) DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; } -void ARMv4::DataRead32(u32 addr, u32* val) +bool ARMv4::DataRead32(u32 addr, u32* val) { addr &= ~3; *val = BusRead32(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][2]; + return true; } -void ARMv4::DataRead32S(u32 addr, u32* val) +bool ARMv4::DataRead32S(u32 addr, u32* val) { addr &= ~3; *val = BusRead32(addr); DataCycles += NDS.ARM7MemTimings[addr >> 15][3]; + return true; } void ARMv4::DataWrite8(u32 addr, u8 val) diff --git a/src/ARM.h b/src/ARM.h index 1e0b71b8..56a6306e 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -130,8 +130,8 @@ public: virtual void DataRead8(u32 addr, u32* val) = 0; virtual void DataRead16(u32 addr, u32* val) = 0; - virtual void DataRead32(u32 addr, u32* val) = 0; - virtual void DataRead32S(u32 addr, u32* val) = 0; + virtual bool DataRead32(u32 addr, u32* val) = 0; + virtual bool DataRead32S(u32 addr, u32* val) = 0; virtual void DataWrite8(u32 addr, u8 val) = 0; virtual void DataWrite16(u32 addr, u16 val) = 0; virtual void DataWrite32(u32 addr, u32 val) = 0; @@ -251,8 +251,8 @@ public: void DataRead8(u32 addr, u32* val) override; void DataRead16(u32 addr, u32* val) override; - void DataRead32(u32 addr, u32* val) override; - void DataRead32S(u32 addr, u32* val) override; + bool DataRead32(u32 addr, u32* val) override; + bool DataRead32S(u32 addr, u32* val) override; void DataWrite8(u32 addr, u8 val) override; void DataWrite16(u32 addr, u16 val) override; void DataWrite32(u32 addr, u32 val) override; @@ -400,8 +400,8 @@ public: void DataRead8(u32 addr, u32* val) override; void DataRead16(u32 addr, u32* val) override; - void DataRead32(u32 addr, u32* val) override; - void DataRead32S(u32 addr, u32* val) override; + bool DataRead32(u32 addr, u32* val) override; + bool DataRead32S(u32 addr, u32* val) override; void DataWrite8(u32 addr, u8 val) override; void DataWrite16(u32 addr, u16 val) override; void DataWrite32(u32 addr, u32 val) override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index dfdb98c2..806b4c3e 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -395,12 +395,14 @@ void A_LDM(ARM* cpu) u32 baseid = (cpu->CurInstr >> 16) & 0xF; u32 base = cpu->R[baseid]; u32 wbbase; + u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - int abortreg = 16; + bool dataabort = false; - if (!(cpu->CurInstr & (1<<23))) + if (!(cpu->CurInstr & (1<<23))) // decrement { + // decrement is actually an increment starting from the end address for (int i = 0; i < 16; i++) { if (cpu->CurInstr & (1<Num == 0) - { - u32 tmpbase = base; - for (int i = 0; i < 16; i++) - { - if (cpu->CurInstr & (1<PU_Map[tmpbase>>12] & 0x01) - { - if (!preinc) tmpbase += 4; - } - else - { - abortreg = i; - break; - } - } - } - } - + // switch to user mode regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); - for (int i = 0; i < std::min(15, abortreg); i++) + for (int i = 0; i < 15; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + if (first) {if (!cpu->DataRead32 (base, &cpu->R[i])) {dataabort = true; goto abortjump;}} + else if (!cpu->DataRead32S(base, &cpu->R[i])) {dataabort = true; goto abortjump;} first = false; if (!preinc) base += 4; } } u32 pc; - if ((cpu->CurInstr & (1<<15)) && (abortreg == 16)) + if ((cpu->CurInstr & (1<<15))) { if (preinc) base += 4; - if (first) cpu->DataRead32 (base, &pc); - else cpu->DataRead32S(base, &pc); + if (first) {if (!cpu->DataRead32 (base, &pc)) dataabort = true;} + else if (!cpu->DataRead32S(base, &pc)) dataabort = true; if (!preinc) base += 4; if (cpu->Num == 1) pc &= ~0x1; } + abortjump: + + // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - // if it's 16 then there was no data abort - if (abortreg == 16) + if (!dataabort) { + // writeback to base if (cpu->CurInstr & (1<<21)) { // post writeback @@ -489,14 +473,12 @@ void A_LDM(ARM* cpu) else cpu->R[baseid] = wbbase; } - + + // jump if pc got written if (cpu->CurInstr & (1<<15)) cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); } - else - { - ((ARMv5*)cpu)->DataAbort(); - } + else cpu->R[baseid] = oldbase; // restore original value of base in case the reg got written to cpu->AddCycles_CDI(); } diff --git a/src/CP15.cpp b/src/CP15.cpp index 5e5b35ea..fa55853d 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -863,12 +863,12 @@ void ARMv5::DataRead16(u32 addr, u32* val) DataCycles = MemTimings[addr >> 12][1]; } -void ARMv5::DataRead32(u32 addr, u32* val) +bool ARMv5::DataRead32(u32 addr, u32* val) { if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -879,38 +879,46 @@ void ARMv5::DataRead32(u32 addr, u32* val) { DataCycles = 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead32(addr); DataCycles = MemTimings[addr >> 12][2]; + return true; } -void ARMv5::DataRead32S(u32 addr, u32* val) +bool ARMv5::DataRead32S(u32 addr, u32* val) { + if (!(PU_Map[addr>>12] & 0x01)) + { + DataAbort(); + return false; + } + addr &= ~3; if (addr < ITCMSize) { DataCycles += 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles += 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead32(addr); DataCycles += MemTimings[addr >> 12][3]; + return true; } void ARMv5::DataWrite8(u32 addr, u8 val) From b5c1ee33fbaf4ff428c3b5f2b2e5d71c37a70041 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 2 Jun 2024 10:33:29 -0400 Subject: [PATCH 004/115] implement stm --- src/ARM.cpp | 6 ++++-- src/ARM.h | 12 ++++++------ src/ARMInterpreter_LoadStore.cpp | 9 +++++---- src/CP15.cpp | 22 +++++++++++++++------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index c96cb65d..acf1b6e4 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1203,21 +1203,23 @@ void ARMv4::DataWrite16(u32 addr, u16 val) DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; } -void ARMv4::DataWrite32(u32 addr, u32 val) +bool ARMv4::DataWrite32(u32 addr, u32 val) { addr &= ~3; BusWrite32(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][2]; + return true; } -void ARMv4::DataWrite32S(u32 addr, u32 val) +bool ARMv4::DataWrite32S(u32 addr, u32 val) { addr &= ~3; BusWrite32(addr, val); DataCycles += NDS.ARM7MemTimings[addr >> 15][3]; + return true; } diff --git a/src/ARM.h b/src/ARM.h index 56a6306e..67087433 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -134,8 +134,8 @@ public: virtual bool DataRead32S(u32 addr, u32* val) = 0; virtual void DataWrite8(u32 addr, u8 val) = 0; virtual void DataWrite16(u32 addr, u16 val) = 0; - virtual void DataWrite32(u32 addr, u32 val) = 0; - virtual void DataWrite32S(u32 addr, u32 val) = 0; + virtual bool DataWrite32(u32 addr, u32 val) = 0; + virtual bool DataWrite32S(u32 addr, u32 val) = 0; virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; @@ -255,8 +255,8 @@ public: bool DataRead32S(u32 addr, u32* val) override; void DataWrite8(u32 addr, u8 val) override; void DataWrite16(u32 addr, u16 val) override; - void DataWrite32(u32 addr, u32 val) override; - void DataWrite32S(u32 addr, u32 val) override; + bool DataWrite32(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override { @@ -404,8 +404,8 @@ public: bool DataRead32S(u32 addr, u32* val) override; void DataWrite8(u32 addr, u8 val) override; void DataWrite16(u32 addr, u16 val) override; - void DataWrite32(u32 addr, u32 val) override; - void DataWrite32S(u32 addr, u32 val) override; + bool DataWrite32(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 806b4c3e..5c6b4c42 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -490,6 +490,7 @@ void A_STM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + bool dataabort = false; if (!(cpu->CurInstr & (1<<23))) { @@ -526,12 +527,12 @@ void A_STM(ARM* cpu) if (i == baseid && !isbanked) { if ((cpu->Num == 0) || (!(cpu->CurInstr & ((1<DataWrite32(base, oldbase) : cpu->DataWrite32S(base, oldbase); + {if (!(first ? cpu->DataWrite32(base, oldbase) : cpu->DataWrite32S(base, oldbase))) {dataabort = true; break;}} else - first ? cpu->DataWrite32(base, base) : cpu->DataWrite32S(base, base); // checkme + if (!(first ? cpu->DataWrite32(base, base) : cpu->DataWrite32S(base, base))) {dataabort = true; break;} // checkme } else - first ? cpu->DataWrite32(base, cpu->R[i]) : cpu->DataWrite32S(base, cpu->R[i]); + if (!(first ? cpu->DataWrite32(base, cpu->R[i]) : cpu->DataWrite32S(base, cpu->R[i]))) {dataabort = true; break;} first = false; @@ -542,7 +543,7 @@ void A_STM(ARM* cpu) if (cpu->CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)) && !dataabort) cpu->R[baseid] = base; cpu->AddCycles_CD(); diff --git a/src/CP15.cpp b/src/CP15.cpp index fa55853d..b2ab9f91 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -979,12 +979,12 @@ void ARMv5::DataWrite16(u32 addr, u16 val) DataCycles = MemTimings[addr >> 12][1]; } -void ARMv5::DataWrite32(u32 addr, u32 val) +bool ARMv5::DataWrite32(u32 addr, u32 val) { if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -996,21 +996,28 @@ void ARMv5::DataWrite32(u32 addr, u32 val) DataCycles = 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite32(addr, val); DataCycles = MemTimings[addr >> 12][2]; + return true; } -void ARMv5::DataWrite32S(u32 addr, u32 val) +bool ARMv5::DataWrite32S(u32 addr, u32 val) { + if (!(PU_Map[addr>>12] & 0x02)) + { + DataAbort(); + return false; + } + addr &= ~3; if (addr < ITCMSize) @@ -1020,17 +1027,18 @@ void ARMv5::DataWrite32S(u32 addr, u32 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles += 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite32(addr, val); DataCycles += MemTimings[addr >> 12][3]; + return true; } void ARMv5::GetCodeMemRegion(u32 addr, MemRegion* region) From 5e760a15361bb20b1d2a659caa74242e8a157344 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 2 Jun 2024 19:34:29 -0400 Subject: [PATCH 005/115] slightly cleaner code --- src/ARMInterpreter_LoadStore.cpp | 33 +++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 5c6b4c42..afcca05d 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -427,8 +427,13 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i])) {dataabort = true; goto abortjump;}} - else if (!cpu->DataRead32S(base, &cpu->R[i])) {dataabort = true; goto abortjump;} + if (!(first ? cpu->DataRead32 (base, &cpu->R[i]) + : cpu->DataRead32S(base, &cpu->R[i]))) + { + dataabort = true; + goto abortjump; + } + first = false; if (!preinc) base += 4; } @@ -438,8 +443,12 @@ void A_LDM(ARM* cpu) if ((cpu->CurInstr & (1<<15))) { if (preinc) base += 4; - if (first) {if (!cpu->DataRead32 (base, &pc)) dataabort = true;} - else if (!cpu->DataRead32S(base, &pc)) dataabort = true; + if (!(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc))) + { + dataabort = true; + } + if (!preinc) base += 4; if (cpu->Num == 1) @@ -524,15 +533,21 @@ void A_STM(ARM* cpu) { if (preinc) base += 4; + u32 val; if (i == baseid && !isbanked) { if ((cpu->Num == 0) || (!(cpu->CurInstr & ((1<DataWrite32(base, oldbase) : cpu->DataWrite32S(base, oldbase))) {dataabort = true; break;}} - else - if (!(first ? cpu->DataWrite32(base, base) : cpu->DataWrite32S(base, base))) {dataabort = true; break;} // checkme + val = oldbase; + else val = base; + } + else val = cpu->R[i]; + + if (!(first ? cpu->DataWrite32 (base, val) + : cpu->DataWrite32S(base, val))) + { + dataabort = true; + break; } - else - if (!(first ? cpu->DataWrite32(base, cpu->R[i]) : cpu->DataWrite32S(base, cpu->R[i]))) {dataabort = true; break;} first = false; From c2a57b79a03adaead1044fccb67988ddfe0d5b67 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 2 Jun 2024 22:41:01 -0400 Subject: [PATCH 006/115] fix stmd(a/b) writeback --- src/ARMInterpreter_LoadStore.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index afcca05d..96766288 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -558,8 +558,12 @@ void A_STM(ARM* cpu) if (cpu->CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)) && !dataabort) - cpu->R[baseid] = base; + if (!dataabort) + { + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + cpu->R[baseid] = base; + } + else cpu->R[baseid] = oldbase; cpu->AddCycles_CD(); } From 1e8194e367517b6c08b0bc4ae38971843973c656 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 4 Jun 2024 19:06:54 -0400 Subject: [PATCH 007/115] fix ldr and str --- src/ARMInterpreter_LoadStore.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 96766288..fe9bfd0c 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -65,9 +65,10 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ - cpu->DataWrite32(offset, storeval); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite32(offset, storeval); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; // TODO: user mode (bit21) #define A_STR_POST \ @@ -75,9 +76,10 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ - cpu->DataWrite32(addr, storeval); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite32(addr, storeval); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ @@ -94,10 +96,11 @@ namespace melonDS::ARMInterpreter #define A_LDR \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead32(offset, &val); \ + u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ + cpu->AddCycles_CDI(); \ + if (dataabort) return; \ val = ROR(val, ((offset&0x3)<<3)); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ if (cpu->Num==1) val &= ~0x1; \ @@ -111,10 +114,11 @@ namespace melonDS::ARMInterpreter // TODO: user mode #define A_LDR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead32(addr, &val); \ + u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ + cpu->AddCycles_CDI(); \ + if (dataabort) return; \ val = ROR(val, ((addr&0x3)<<3)); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CDI(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ if (cpu->Num==1) val &= ~0x1; \ From 317a8c61e592e310e738381f2fccfdc81521cc27 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 4 Jun 2024 21:22:39 -0400 Subject: [PATCH 008/115] data abort handling for (almost) all (arm) instructions full list: strb, ldrb, strh, ldrd, strd, ldrh, ldrsb, ldrsh --- src/ARM.cpp | 12 ++-- src/ARM.h | 24 +++---- src/ARMInterpreter_LoadStore.cpp | 110 +++++++++++++++++-------------- src/CP15.cpp | 36 +++++----- 4 files changed, 102 insertions(+), 80 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index acf1b6e4..cf45a564 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1152,20 +1152,22 @@ u32 ARMv5::ReadMem(u32 addr, int size) } #endif -void ARMv4::DataRead8(u32 addr, u32* val) +bool ARMv4::DataRead8(u32 addr, u32* val) { *val = BusRead8(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataRead16(u32 addr, u32* val) +bool ARMv4::DataRead16(u32 addr, u32* val) { addr &= ~1; *val = BusRead16(addr); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } bool ARMv4::DataRead32(u32 addr, u32* val) @@ -1187,20 +1189,22 @@ bool ARMv4::DataRead32S(u32 addr, u32* val) return true; } -void ARMv4::DataWrite8(u32 addr, u8 val) +bool ARMv4::DataWrite8(u32 addr, u8 val) { BusWrite8(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } -void ARMv4::DataWrite16(u32 addr, u16 val) +bool ARMv4::DataWrite16(u32 addr, u16 val) { addr &= ~1; BusWrite16(addr, val); DataRegion = addr; DataCycles = NDS.ARM7MemTimings[addr >> 15][0]; + return true; } bool ARMv4::DataWrite32(u32 addr, u32 val) diff --git a/src/ARM.h b/src/ARM.h index 67087433..f2277253 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -128,12 +128,12 @@ public: void SetupCodeMem(u32 addr); - virtual void DataRead8(u32 addr, u32* val) = 0; - virtual void DataRead16(u32 addr, u32* val) = 0; + virtual bool DataRead8(u32 addr, u32* val) = 0; + virtual bool DataRead16(u32 addr, u32* val) = 0; virtual bool DataRead32(u32 addr, u32* val) = 0; virtual bool DataRead32S(u32 addr, u32* val) = 0; - virtual void DataWrite8(u32 addr, u8 val) = 0; - virtual void DataWrite16(u32 addr, u16 val) = 0; + virtual bool DataWrite8(u32 addr, u8 val) = 0; + virtual bool DataWrite16(u32 addr, u16 val) = 0; virtual bool DataWrite32(u32 addr, u32 val) = 0; virtual bool DataWrite32S(u32 addr, u32 val) = 0; @@ -249,12 +249,12 @@ public: // all code accesses are forced nonseq 32bit u32 CodeRead32(u32 addr, bool branch); - void DataRead8(u32 addr, u32* val) override; - void DataRead16(u32 addr, u32* val) override; + bool DataRead8(u32 addr, u32* val) override; + bool DataRead16(u32 addr, u32* val) override; bool DataRead32(u32 addr, u32* val) override; bool DataRead32S(u32 addr, u32* val) override; - void DataWrite8(u32 addr, u8 val) override; - void DataWrite16(u32 addr, u16 val) override; + bool DataWrite8(u32 addr, u8 val) override; + bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; bool DataWrite32S(u32 addr, u32 val) override; @@ -398,12 +398,12 @@ public: return BusRead32(addr); } - void DataRead8(u32 addr, u32* val) override; - void DataRead16(u32 addr, u32* val) override; + bool DataRead8(u32 addr, u32* val) override; + bool DataRead16(u32 addr, u32* val) override; bool DataRead32(u32 addr, u32* val) override; bool DataRead32S(u32 addr, u32* val) override; - void DataWrite8(u32 addr, u8 val) override; - void DataWrite16(u32 addr, u16 val) override; + bool DataWrite8(u32 addr, u8 val) override; + bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index fe9bfd0c..67e09a7b 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -83,16 +83,18 @@ namespace melonDS::ARMInterpreter #define A_STRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; // TODO: user mode (bit21) #define A_STRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ @@ -131,18 +133,20 @@ namespace melonDS::ARMInterpreter #define A_LDRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead8(offset, &val); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ // TODO: user mode #define A_LDRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; cpu->DataRead8(addr, &val); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ + u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ @@ -229,103 +233,113 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; // TODO: CHECK LDRD/STRD TIMINGS!! #define A_LDRD \ if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED LDRD %d\n", r+1); } \ - cpu->DataRead32 (offset , &cpu->R[r ]); \ - cpu->DataRead32S(offset+4, &cpu->R[r+1]); \ - cpu->AddCycles_CDI(); + if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32S(offset+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ + cpu->AddCycles_CDI(); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED LDRD_POST %d\n", r+1); } \ - cpu->DataRead32 (addr , &cpu->R[r ]); \ - cpu->DataRead32S(addr+4, &cpu->R[r+1]); \ - cpu->AddCycles_CDI(); + if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32S(addr+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ + cpu->AddCycles_CDI(); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED STRD %d\n", r+1); } \ - cpu->DataWrite32 (offset , cpu->R[r ]); \ - cpu->DataWrite32S(offset+4, cpu->R[r+1]); \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite32(offset, cpu->R[r ]); /* yes, this data abort behavior is on purpose */ \ + dataabort |= !cpu->DataWrite32S (offset+4, cpu->R[r+1]); /* no, i dont understand it either */ \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED STRD_POST %d\n", r+1); } \ - cpu->DataWrite32 (addr , cpu->R[r ]); \ - cpu->DataWrite32S(addr+4, cpu->R[r+1]); \ - cpu->AddCycles_CD(); + bool dataabort = !cpu->DataWrite32(addr, cpu->R[r ]); \ + dataabort |= !cpu->DataWrite32S (addr+4, cpu->R[r+1]); \ + cpu->AddCycles_CD(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + bool dataabort = !cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + bool dataabort = !cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead8(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + bool dataabort = !cpu->DataRead8(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead8(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + bool dataabort = !cpu->DataRead8(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + bool dataabort = !cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + bool dataabort = !cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_IMPLEMENT_HD_LDRSTR(x) \ diff --git a/src/CP15.cpp b/src/CP15.cpp index b2ab9f91..857c5c90 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -807,12 +807,12 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) } -void ARMv5::DataRead8(u32 addr, u32* val) +bool ARMv5::DataRead8(u32 addr, u32* val) { if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -821,25 +821,26 @@ void ARMv5::DataRead8(u32 addr, u32* val) { DataCycles = 1; *val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead8(addr); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataRead16(u32 addr, u32* val) +bool ARMv5::DataRead16(u32 addr, u32* val) { if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -850,17 +851,18 @@ void ARMv5::DataRead16(u32 addr, u32* val) { DataCycles = 1; *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return; + return true; } *val = BusRead16(addr); DataCycles = MemTimings[addr >> 12][1]; + return true; } bool ARMv5::DataRead32(u32 addr, u32* val) @@ -921,12 +923,12 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) return true; } -void ARMv5::DataWrite8(u32 addr, u8 val) +bool ARMv5::DataWrite8(u32 addr, u8 val) { if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -936,25 +938,26 @@ void ARMv5::DataWrite8(u32 addr, u8 val) DataCycles = 1; *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite8(addr, val); DataCycles = MemTimings[addr >> 12][1]; + return true; } -void ARMv5::DataWrite16(u32 addr, u16 val) +bool ARMv5::DataWrite16(u32 addr, u16 val) { if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); - return; + return false; } DataRegion = addr; @@ -966,17 +969,18 @@ void ARMv5::DataWrite16(u32 addr, u16 val) DataCycles = 1; *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); - return; + return true; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return; + return true; } BusWrite16(addr, val); DataCycles = MemTimings[addr >> 12][1]; + return true; } bool ARMv5::DataWrite32(u32 addr, u32 val) From 1871c48849949d8700271e57c7fef7a85216d45e Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:28:51 -0400 Subject: [PATCH 009/115] fix double data aborts with strd --- src/ARM.cpp | 2 +- src/ARM.h | 6 +++--- src/ARMInterpreter_LoadStore.cpp | 4 ++-- src/CP15.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index cf45a564..0d2976d2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1217,7 +1217,7 @@ bool ARMv4::DataWrite32(u32 addr, u32 val) return true; } -bool ARMv4::DataWrite32S(u32 addr, u32 val) +bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) { addr &= ~3; diff --git a/src/ARM.h b/src/ARM.h index f2277253..1f68567c 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -135,7 +135,7 @@ public: virtual bool DataWrite8(u32 addr, u8 val) = 0; virtual bool DataWrite16(u32 addr, u16 val) = 0; virtual bool DataWrite32(u32 addr, u32 val) = 0; - virtual bool DataWrite32S(u32 addr, u32 val) = 0; + virtual bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) = 0; virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; @@ -256,7 +256,7 @@ public: bool DataWrite8(u32 addr, u8 val) override; bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; void AddCycles_C() override { @@ -405,7 +405,7 @@ public: bool DataWrite8(u32 addr, u8 val) override; bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val) override; + bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 67e09a7b..d28aed0f 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -273,7 +273,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED STRD %d\n", r+1); } \ bool dataabort = !cpu->DataWrite32(offset, cpu->R[r ]); /* yes, this data abort behavior is on purpose */ \ - dataabort |= !cpu->DataWrite32S (offset+4, cpu->R[r+1]); /* no, i dont understand it either */ \ + dataabort |= !cpu->DataWrite32S (offset+4, cpu->R[r+1], dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -284,7 +284,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { r--; printf("!! MISALIGNED STRD_POST %d\n", r+1); } \ bool dataabort = !cpu->DataWrite32(addr, cpu->R[r ]); \ - dataabort |= !cpu->DataWrite32S (addr+4, cpu->R[r+1]); \ + dataabort |= !cpu->DataWrite32S (addr+4, cpu->R[r+1], dataabort); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; diff --git a/src/CP15.cpp b/src/CP15.cpp index 857c5c90..34c8addf 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -1014,11 +1014,11 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) return true; } -bool ARMv5::DataWrite32S(u32 addr, u32 val) +bool ARMv5::DataWrite32S(u32 addr, u32 val, bool dataabort) { if (!(PU_Map[addr>>12] & 0x02)) { - DataAbort(); + if (!dataabort) DataAbort(); return false; } From 7c3108e20f0e8ec5391df2c09bd5af99464f361f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:31:44 -0400 Subject: [PATCH 010/115] handle swp instruction aborts --- src/ARMInterpreter_LoadStore.cpp | 34 ++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index d28aed0f..c8544a67 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -382,13 +382,16 @@ void A_SWP(ARM* cpu) u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 val; - cpu->DataRead32(base, &val); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); - - u32 numD = cpu->DataCycles; - cpu->DataWrite32(base, rm); - cpu->DataCycles += numD; - + if (cpu->DataRead32(base, &val)) + { + u32 numD = cpu->DataCycles; + if (cpu->DataWrite32(base, rm)) + { + // rd only gets updated if both read and write succeed + cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); + } + cpu->DataCycles += numD; + } cpu->AddCycles_CDI(); } @@ -397,12 +400,17 @@ void A_SWPB(ARM* cpu) u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; - cpu->DataRead8(base, &cpu->R[(cpu->CurInstr >> 12) & 0xF]); - - u32 numD = cpu->DataCycles; - cpu->DataWrite8(base, rm); - cpu->DataCycles += numD; - + u32 val; + if (cpu->DataRead8(base, &val)) + { + u32 numD = cpu->DataCycles; + if (cpu->DataWrite8(base, rm)) + { + // rd only gets updated if both read and write succeed + cpu->R[(cpu->CurInstr >> 12) & 0xF] = val; + } + cpu->DataCycles += numD; + } cpu->AddCycles_CDI(); } From 13ae96b4e3540696bc7de5aeb5a4ee5f5999380a Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:32:12 -0400 Subject: [PATCH 011/115] simple thumb instructions (untested but probably right) --- src/ARMInterpreter_LoadStore.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index c8544a67..c518adfb 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -631,8 +631,8 @@ void T_LDR_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 val; - cpu->DataRead32(addr, &val); - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); + if (cpu->DataRead32(addr, &val)) + cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); cpu->AddCycles_CDI(); } @@ -657,8 +657,8 @@ void T_STRH_REG(ARM* cpu) void T_LDRSB_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; + if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) + cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); } @@ -674,8 +674,8 @@ void T_LDRH_REG(ARM* cpu) void T_LDRSH_REG(ARM* cpu) { u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; + if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) + cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); } @@ -696,8 +696,8 @@ void T_LDR_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 val; - cpu->DataRead32(offset, &val); - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); + if (cpu->DataRead32(offset, &val)) + cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); cpu->AddCycles_CDI(); } From d6cd18945561d7e1edecb01de5368d687372307c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 6 Jun 2024 18:58:43 -0400 Subject: [PATCH 012/115] rework data abort handling for ldm/stm; implement thumb stmia+push --- src/ARMInterpreter_LoadStore.cpp | 106 +++++++++++++++++++------------ 1 file changed, 65 insertions(+), 41 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index c518adfb..b615e9e1 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -424,7 +424,6 @@ void A_LDM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - bool dataabort = false; if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -456,8 +455,7 @@ void A_LDM(ARM* cpu) if (!(first ? cpu->DataRead32 (base, &cpu->R[i]) : cpu->DataRead32S(base, &cpu->R[i]))) { - dataabort = true; - goto abortjump; + goto dataabort; } first = false; @@ -472,7 +470,7 @@ void A_LDM(ARM* cpu) if (!(first ? cpu->DataRead32 (base, &pc) : cpu->DataRead32S(base, &pc))) { - dataabort = true; + goto dataabort; } if (!preinc) base += 4; @@ -481,39 +479,46 @@ void A_LDM(ARM* cpu) pc &= ~0x1; } - abortjump: - // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - if (!dataabort) + // writeback to base + if (cpu->CurInstr & (1<<21)) { - // writeback to base - if (cpu->CurInstr & (1<<21)) - { - // post writeback - if (cpu->CurInstr & (1<<23)) - wbbase = base; + // post writeback + if (cpu->CurInstr & (1<<23)) + wbbase = base; - if (cpu->CurInstr & (1 << baseid)) + if (cpu->CurInstr & (1 << baseid)) + { + if (cpu->Num == 0) { - if (cpu->Num == 0) - { - u32 rlist = cpu->CurInstr & 0xFFFF; - if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) - cpu->R[baseid] = wbbase; - } + u32 rlist = cpu->CurInstr & 0xFFFF; + if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) + cpu->R[baseid] = wbbase; } - else - cpu->R[baseid] = wbbase; } - - // jump if pc got written - if (cpu->CurInstr & (1<<15)) - cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + else + cpu->R[baseid] = wbbase; + } + + // jump if pc got written + if (cpu->CurInstr & (1<<15)) + cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + + // jump here if a data abort occurred; writeback is ignored, and any jumps were aborted + if (false) + { + dataabort: + + // switch back to original set of regs + if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) + cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + // restore original value of base in case the reg got written to + cpu->R[baseid] = oldbase; } - else cpu->R[baseid] = oldbase; // restore original value of base in case the reg got written to cpu->AddCycles_CDI(); } @@ -525,7 +530,6 @@ void A_STM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - bool dataabort = false; if (!(cpu->CurInstr & (1<<23))) { @@ -571,8 +575,7 @@ void A_STM(ARM* cpu) if (!(first ? cpu->DataWrite32 (base, val) : cpu->DataWrite32S(base, val))) { - dataabort = true; - break; + goto dataabort; } first = false; @@ -584,12 +587,20 @@ void A_STM(ARM* cpu) if (cpu->CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - if (!dataabort) + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + cpu->R[baseid] = base; + + // jump here if a data abort occurred + if (false) { - if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) - cpu->R[baseid] = base; + dataabort: + + if (cpu->CurInstr & (1<<22)) + cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + // restore original value of base + cpu->R[baseid] = oldbase; } - else cpu->R[baseid] = oldbase; cpu->AddCycles_CD(); } @@ -774,14 +785,17 @@ void T_PUSH(ARM* cpu) u32 base = cpu->R[13]; base -= (nregs<<2); - cpu->R[13] = base; + u32 wbbase = base; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]); - else cpu->DataWrite32S(base, cpu->R[i]); + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) + { + goto dataabort; + } first = false; base += 4; } @@ -789,10 +803,16 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) { - if (first) cpu->DataWrite32 (base, cpu->R[14]); - else cpu->DataWrite32S(base, cpu->R[14]); + if (!(first ? cpu->DataWrite32 (base, cpu->R[14]) + : cpu->DataWrite32S(base, cpu->R[14]))) + { + goto dataabort; + } } + cpu->R[13] = wbbase; + + dataabort: cpu->AddCycles_CD(); } @@ -835,8 +855,11 @@ void T_STMIA(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]); - else cpu->DataWrite32S(base, cpu->R[i]); + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) + { + goto dataabort; + } first = false; base += 4; } @@ -844,6 +867,7 @@ void T_STMIA(ARM* cpu) // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; + dataabort: cpu->AddCycles_CD(); } From 8bc7e4591c4851a90b6f245b3c51fa2f13785a32 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 6 Jun 2024 19:05:28 -0400 Subject: [PATCH 013/115] thumb ldmia/pop data aborts --- src/ARMInterpreter_LoadStore.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index b615e9e1..144ecec5 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -825,8 +825,11 @@ void T_POP(ARM* cpu) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + if (!(first ? cpu->DataRead32 (base, &cpu->R[i]) + : cpu->DataRead32S(base, &cpu->R[i]))) + { + goto dataabort; + } first = false; base += 4; } @@ -835,14 +838,19 @@ void T_POP(ARM* cpu) if (cpu->CurInstr & (1<<8)) { u32 pc; - if (first) cpu->DataRead32 (base, &pc); - else cpu->DataRead32S(base, &pc); + if (!(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc))) + { + goto dataabort; + } if (cpu->Num==1) pc |= 0x1; cpu->JumpTo(pc); base += 4; } cpu->R[13] = base; + + dataabort: cpu->AddCycles_CDI(); } @@ -880,8 +888,11 @@ void T_LDMIA(ARM* cpu) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]); - else cpu->DataRead32S(base, &cpu->R[i]); + if (!(first ? cpu->DataRead32 (base, &cpu->R[i]) + : cpu->DataRead32S(base, &cpu->R[i]))) + { + goto dataabort; + } first = false; base += 4; } @@ -890,6 +901,7 @@ void T_LDMIA(ARM* cpu) if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; + dataabort: cpu->AddCycles_CDI(); } From bd3611b51d9e6ccc41f305607c0cb824df106734 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 7 Jun 2024 20:43:02 -0400 Subject: [PATCH 014/115] unaligned registers with strd/ldrd raise an exception --- src/ARMInterpreter_LoadStore.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 144ecec5..9782140b 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -251,7 +251,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED LDRD %d\n", r+1); } \ + if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32S(offset+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ cpu->AddCycles_CDI(); \ @@ -261,7 +261,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED LDRD_POST %d\n", r+1); } \ + if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32S(addr+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ cpu->AddCycles_CDI(); \ @@ -271,7 +271,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED STRD %d\n", r+1); } \ + if (r&1) { A_UNK(cpu); return; } \ bool dataabort = !cpu->DataWrite32(offset, cpu->R[r ]); /* yes, this data abort behavior is on purpose */ \ dataabort |= !cpu->DataWrite32S (offset+4, cpu->R[r+1], dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ @@ -282,7 +282,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { r--; printf("!! MISALIGNED STRD_POST %d\n", r+1); } \ + if (r&1) { A_UNK(cpu); return; } \ bool dataabort = !cpu->DataWrite32(addr, cpu->R[r ]); \ dataabort |= !cpu->DataWrite32S (addr+4, cpu->R[r+1], dataabort); \ cpu->AddCycles_CD(); \ From 2b0ed459e125af0df9665937115d0bfff6d7aaf8 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 7 Jun 2024 23:46:49 -0400 Subject: [PATCH 015/115] fully implement r15 stores being +12 of addr --- src/ARMInterpreter_LoadStore.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 9782140b..cfa8e3d4 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -251,7 +251,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } \ + if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32S(offset+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ cpu->AddCycles_CDI(); \ @@ -261,7 +261,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } \ + if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32S(addr+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ cpu->AddCycles_CDI(); \ @@ -271,9 +271,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(offset, cpu->R[r ]); /* yes, this data abort behavior is on purpose */ \ - dataabort |= !cpu->DataWrite32S (offset+4, cpu->R[r+1], dataabort); /* no, i dont understand it either */ \ + if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -282,9 +283,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(addr, cpu->R[r ]); \ - dataabort |= !cpu->DataWrite32S (addr+4, cpu->R[r+1], dataabort); \ + if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -380,6 +382,7 @@ void A_SWP(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF]; + if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; if (cpu->DataRead32(base, &val)) @@ -399,6 +402,7 @@ void A_SWPB(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; + if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; if (cpu->DataRead8(base, &val)) @@ -572,6 +576,8 @@ void A_STM(ARM* cpu) } else val = cpu->R[i]; + if (i == 15) val+=4; + if (!(first ? cpu->DataWrite32 (base, val) : cpu->DataWrite32S(base, val))) { From 73507621f5b1460191eaf0242978859b5aad9c45 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 7 Jun 2024 23:50:31 -0400 Subject: [PATCH 016/115] idk why it took me two tries to get these instructions to work properly --- src/ARMInterpreter_LoadStore.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index cfa8e3d4..1d8595ab 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -83,7 +83,9 @@ namespace melonDS::ARMInterpreter #define A_STRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataWrite8(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ + bool dataabort = !cpu->DataWrite8(offset, storeval); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -91,7 +93,9 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataWrite8(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ + bool dataabort = !cpu->DataWrite8(addr, storeval); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -233,14 +237,18 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataWrite16(offset, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ + bool dataabort = !cpu->DataWrite16(offset, storeval); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataWrite16(addr, cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ + bool dataabort = !cpu->DataWrite16(addr, storeval); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; From 0c887202e7622d2474945fee2e23a059261f1efa Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 10:40:23 -0400 Subject: [PATCH 017/115] fix some more instructions? --- src/ARMInterpreter_LoadStore.cpp | 64 +++++++++++++++++++------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 1d8595ab..aa4a90eb 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -141,8 +141,8 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ // TODO: user mode #define A_LDRB_POST \ @@ -151,8 +151,8 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRB PC %08X\n", cpu->R[15]); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ @@ -261,7 +261,9 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - if (!cpu->DataRead32S(offset+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) A_UNK(cpu); /* hang??? */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -271,7 +273,9 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - if (!cpu->DataRead32S(addr+4, &cpu->R[r+1])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) A_UNK(cpu); /*hang??? */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -301,54 +305,60 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRH PC %08X\n", cpu->R[15]); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead8(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + val = (s32)(s8)val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* hang??? */ \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead8(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s8)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSB PC %08X\n", cpu->R[15]); \ + val = (s32)(s8)val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* hang??? */ \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead16(offset, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + val = (s32)(s16)val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - bool dataabort = !cpu->DataRead16(addr, &cpu->R[(cpu->CurInstr>>12) & 0xF]); \ + u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = (s32)(s16)cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) printf("!! LDRSH PC %08X\n", cpu->R[15]); \ + val = (s32)(s16)val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -398,8 +408,9 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - // rd only gets updated if both read and write succeed - cpu->R[(cpu->CurInstr >> 12) & 0xF] = ROR(val, 8*(base&0x3)); + // rd only gets updated if both read and write succeed, and if rd isn't r15 + u32 rd = (cpu->CurInstr >> 12) & 0xF; + if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); } cpu->DataCycles += numD; } @@ -418,8 +429,9 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - // rd only gets updated if both read and write succeed - cpu->R[(cpu->CurInstr >> 12) & 0xF] = val; + // rd only gets updated if both read and write succeed, and if rd isn't r15 + u32 rd = (cpu->CurInstr >> 12) & 0xF; + if (rd != 15) cpu->R[rd] = val; } cpu->DataCycles += numD; } From 8191f92bb639a51f21ca2680113574a1970a9ccc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 10:42:19 -0400 Subject: [PATCH 018/115] mcr is also affected --- src/ARMInterpreter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index ff73e230..6da76b16 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -216,10 +216,12 @@ void A_MCR(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; + if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (cpu->Num==0 && cp==15) { - ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]); + ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, val); } else if (cpu->Num==1 && cp==14) { From 5f97dfc1ab0d21726acbb425081bd4579a510d1c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 10:53:22 -0400 Subject: [PATCH 019/115] fix bits fixed to 0 for pu region sizing being set --- src/CP15.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CP15.cpp b/src/CP15.cpp index 34c8addf..7b11696b 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -579,7 +579,7 @@ void ARMv5::CP15Write(u32 id, u32 val) case 0x670: case 0x671: char log_output[1024]; - PU_Region[(id >> 4) & 0xF] = val; + PU_Region[(id >> 4) & 0xF] = val & ~(0x3F<<6); std::snprintf(log_output, sizeof(log_output), From 3699768ac9657426da6b012ce93714e3823a24b8 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 13:53:12 -0400 Subject: [PATCH 020/115] most cpsr bits can't actually be updated (or at least can't be read?) --- src/ARMInterpreter.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 6da76b16..d6c3a488 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -101,9 +101,9 @@ void A_MSR_IMM(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; + if (cpu->CurInstr & (1<<19)) mask |= (cpu->Num ? 0xF0000000 /* checkme */ : 0xF8000000); if (!(cpu->CurInstr & (1<<22))) mask &= 0xFFFFFFDF; @@ -154,9 +154,9 @@ void A_MSR_REG(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000; + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; + if (cpu->CurInstr & (1<<19)) mask |= (cpu->Num ? 0xF0000000 /* checkme */ : 0xF8000000); if (!(cpu->CurInstr & (1<<22))) mask &= 0xFFFFFFDF; From 659763f903b3517e7bbb6f76c2eadf1232cf93eb Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 16:15:02 -0400 Subject: [PATCH 021/115] clarification --- src/ARMInterpreter_LoadStore.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index aa4a90eb..19136cce 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -117,7 +117,7 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ } -// TODO: user mode +// TODO: user mode (note: ldrt w/ rd = 15 may be an undef instr) #define A_LDR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ @@ -144,7 +144,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ -// TODO: user mode +// TODO: user mode (note: ldrbt w/ rd = 15 may be an undef instr) #define A_LDRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ @@ -262,7 +262,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) A_UNK(cpu); /* hang??? */ \ + if (r == 14) A_UNK(cpu); /* checkme */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -274,7 +274,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) A_UNK(cpu); /*hang??? */ \ + if (r == 14) A_UNK(cpu); /* checkme */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -327,7 +327,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* hang??? */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -337,7 +337,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* hang??? */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -347,7 +347,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* checkme */ \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -357,7 +357,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* checkme */ \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; From 849d4e51acd9b934ee3184e521acf1441d75c66d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 22:12:44 -0400 Subject: [PATCH 022/115] imma be real, i have no idea what is going on here --- src/ARM.cpp | 38 ++++++++++++++++++++++++++++++++ src/ARM.h | 13 ++++++++--- src/ARMInterpreter_LoadStore.cpp | 24 ++++++++++---------- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 0d2976d2..a9c2d124 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -177,6 +177,8 @@ void ARM::Reset() ExceptionBase = Num ? 0x00000000 : 0xFFFF0000; + BuggyJump = 0; + CodeMem.Mem = NULL; #ifdef JIT_ENABLED @@ -284,6 +286,32 @@ void ARM::SetupCodeMem(u32 addr) } } +void ARMv5::BuggedJumpTo32(const u32 addr) +{ + if (BuggyJump == 1) + { + BuggyJump = 2; + JumpTo(addr); + } + else + { + JumpTo(addr & ~0x1); + } +} + +void ARMv5::BuggedJumpTo(const u32 addr) +{ + if ((BuggyJump == 0) && (addr & 0x3)) + { + BuggyJump = 1; + PrefetchAbort(); // checkme + } + else + { + JumpTo(addr); + } +} + void ARMv5::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) @@ -352,6 +380,16 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) NDS.MonitorARM9Jump(addr); } +void ARMv4::BuggedJumpTo32(const u32 addr) +{ + JumpTo(addr); // todo +} + +void ARMv4::BuggedJumpTo(const u32 addr) +{ + JumpTo(addr); // todo +} + void ARMv4::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) diff --git a/src/ARM.h b/src/ARM.h index 1f68567c..9cda0be1 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -64,7 +64,9 @@ public: virtual void DoSavestate(Savestate* file); virtual void FillPipeline() = 0; - + + virtual void BuggedJumpTo32(const u32 addr) = 0; + virtual void BuggedJumpTo(const u32 addr) = 0; virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; void RestoreCPSR(); @@ -173,6 +175,7 @@ public: u32 R_UND[3]; u32 CurInstr; u32 NextInstr[2]; + u32 BuggyJump; u32 ExceptionBase; @@ -235,7 +238,9 @@ public: void UpdateRegionTimings(u32 addrstart, u32 addrend); void FillPipeline() override; - + + void BuggedJumpTo32(const u32 addr) override; + void BuggedJumpTo(const u32 addr) override; void JumpTo(u32 addr, bool restorecpsr = false) override; void PrefetchAbort(); @@ -380,7 +385,9 @@ public: ARMv4(melonDS::NDS& nds, std::optional gdb, bool jit); void FillPipeline() override; - + + void BuggedJumpTo32(const u32 addr) override; + void BuggedJumpTo(const u32 addr) override; void JumpTo(u32 addr, bool restorecpsr = false) override; void Execute() override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 19136cce..b5a3ee63 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -141,8 +141,8 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; // TODO: user mode (note: ldrbt w/ rd = 15 may be an undef instr) #define A_LDRB_POST \ @@ -151,8 +151,8 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; @@ -262,7 +262,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) A_UNK(cpu); /* checkme */ \ + if (r == 14) cpu->BuggedJumpTo32(val); \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -274,7 +274,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) A_UNK(cpu); /* checkme */ \ + if (r == 14) cpu->BuggedJumpTo32(val); \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -308,7 +308,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -317,7 +317,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -327,7 +327,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -337,7 +337,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -347,7 +347,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* checkme */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -357,7 +357,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) A_UNK(cpu); /* checkme */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; From b846c6f100b53fc9b546e5a0acf870734e5f1e07 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 8 Jun 2024 22:17:07 -0400 Subject: [PATCH 023/115] remove out of date comments --- src/ARMInterpreter_LoadStore.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index b5a3ee63..2f2a7912 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -117,7 +117,7 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ } -// TODO: user mode (note: ldrt w/ rd = 15 may be an undef instr) +// TODO: user mode #define A_LDR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ @@ -144,7 +144,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; -// TODO: user mode (note: ldrbt w/ rd = 15 may be an undef instr) +// TODO: user mode #define A_LDRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ From be60c68aeb66e918686c5a9d0d5729af79c5cf6c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 9 Jun 2024 07:25:42 -0400 Subject: [PATCH 024/115] more weirdness --- src/ARM.cpp | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index a9c2d124..6ec14682 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -288,11 +288,19 @@ void ARM::SetupCodeMem(u32 addr) void ARMv5::BuggedJumpTo32(const u32 addr) { + // ldrd to pc + // behavior seems to be related to if a bugged 8/16 bit write has prefetch aborted (does any p.abort work?) + // switching to thumb mode only seems to work the first time after one of the above aborts? + // writing to pc seems to fail entirely if an abort hasn't occured and thumb interworking is in v4 mode if (BuggyJump == 1) { BuggyJump = 2; JumpTo(addr); } + else if ((BuggyJump == 0) && (CP15Control & (1<<15))) + { + return; // checkme + } else { JumpTo(addr & ~0x1); @@ -301,15 +309,27 @@ void ARMv5::BuggedJumpTo32(const u32 addr) void ARMv5::BuggedJumpTo(const u32 addr) { - if ((BuggyJump == 0) && (addr & 0x3)) + // 16 and 8 bit loads (signed instructions included) to pc + // if they're misaligned they'll prefetch abort + // but they can only prefetch abort once, every time afterwards will succeed (more testing needed) + // if the lsb is set they will try to switch to thumb state, though it'll fail if they haven't prefetch aborted yet + // they work as expected if thumb interwork is set to v4 mode + if (BuggyJump == 0) { - BuggyJump = 1; - PrefetchAbort(); // checkme - } - else - { - JumpTo(addr); + if (CP15Control & (1<<15)) + { + JumpTo(addr & ~1); + return; + } + else if (addr & 0x3) + { + if (addr & 0x1) CPSR |= 0x20; + BuggyJump = 1; + PrefetchAbort(); + return; + } } + JumpTo(addr); } void ARMv5::JumpTo(u32 addr, bool restorecpsr) @@ -382,12 +402,12 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) void ARMv4::BuggedJumpTo32(const u32 addr) { - JumpTo(addr); // todo + JumpTo(addr & ~1); // todo } void ARMv4::BuggedJumpTo(const u32 addr) { - JumpTo(addr); // todo + JumpTo(addr & ~1); // todo } void ARMv4::JumpTo(u32 addr, bool restorecpsr) From b90d5c23200d5fda12c11a9e06de98b426c06332 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 9 Jun 2024 12:18:31 -0400 Subject: [PATCH 025/115] what the actual F*** is going on --- src/ARM.cpp | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6ec14682..94f2debf 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -290,20 +290,20 @@ void ARMv5::BuggedJumpTo32(const u32 addr) { // ldrd to pc // behavior seems to be related to if a bugged 8/16 bit write has prefetch aborted (does any p.abort work?) - // switching to thumb mode only seems to work the first time after one of the above aborts? - // writing to pc seems to fail entirely if an abort hasn't occured and thumb interworking is in v4 mode + // switching to thumb mode only seems to work the first time an ldrd pc is executed after one of the above aborts? + // also it can restore cpsr but only if the PU is disabled (?????????????????????????????????????) if (BuggyJump == 1) { BuggyJump = 2; - JumpTo(addr); - } - else if ((BuggyJump == 0) && (CP15Control & (1<<15))) - { - return; // checkme + + if (CP15Control & (1<<15)) + JumpTo(addr & ~0x1, !(CP15Control & 1)); + else + JumpTo(addr, !(CP15Control & 1)); } else { - JumpTo(addr & ~0x1); + JumpTo(addr & ~0x1, !(CP15Control & 1)); } } @@ -313,23 +313,18 @@ void ARMv5::BuggedJumpTo(const u32 addr) // if they're misaligned they'll prefetch abort // but they can only prefetch abort once, every time afterwards will succeed (more testing needed) // if the lsb is set they will try to switch to thumb state, though it'll fail if they haven't prefetch aborted yet - // they work as expected if thumb interwork is set to v4 mode - if (BuggyJump == 0) + if ((BuggyJump == 0) && (addr & 0x3)) { - if (CP15Control & (1<<15)) - { - JumpTo(addr & ~1); - return; - } - else if (addr & 0x3) - { - if (addr & 0x1) CPSR |= 0x20; - BuggyJump = 1; - PrefetchAbort(); - return; - } + if (addr & 0x1) CPSR |= 0x20; + BuggyJump = 1; + PrefetchAbort(); + return; } - JumpTo(addr); + + if (CP15Control & (1<<15)) + JumpTo(addr & ~0x1); + else + JumpTo(addr); } void ARMv5::JumpTo(u32 addr, bool restorecpsr) From ae0824fdd35e282bf6a6b5787f6585f21eda1ae7 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 9 Jun 2024 19:10:43 -0400 Subject: [PATCH 026/115] it all makes sense now... --- src/ARM.cpp | 71 +++++++++++--------------------- src/ARM.h | 9 ++-- src/ARMInterpreter_LoadStore.cpp | 20 ++++----- 3 files changed, 36 insertions(+), 64 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 94f2debf..7bfb95a2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -286,47 +286,6 @@ void ARM::SetupCodeMem(u32 addr) } } -void ARMv5::BuggedJumpTo32(const u32 addr) -{ - // ldrd to pc - // behavior seems to be related to if a bugged 8/16 bit write has prefetch aborted (does any p.abort work?) - // switching to thumb mode only seems to work the first time an ldrd pc is executed after one of the above aborts? - // also it can restore cpsr but only if the PU is disabled (?????????????????????????????????????) - if (BuggyJump == 1) - { - BuggyJump = 2; - - if (CP15Control & (1<<15)) - JumpTo(addr & ~0x1, !(CP15Control & 1)); - else - JumpTo(addr, !(CP15Control & 1)); - } - else - { - JumpTo(addr & ~0x1, !(CP15Control & 1)); - } -} - -void ARMv5::BuggedJumpTo(const u32 addr) -{ - // 16 and 8 bit loads (signed instructions included) to pc - // if they're misaligned they'll prefetch abort - // but they can only prefetch abort once, every time afterwards will succeed (more testing needed) - // if the lsb is set they will try to switch to thumb state, though it'll fail if they haven't prefetch aborted yet - if ((BuggyJump == 0) && (addr & 0x3)) - { - if (addr & 0x1) CPSR |= 0x20; - BuggyJump = 1; - PrefetchAbort(); - return; - } - - if (CP15Control & (1<<15)) - JumpTo(addr & ~0x1); - else - JumpTo(addr); -} - void ARMv5::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) @@ -395,14 +354,25 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) NDS.MonitorARM9Jump(addr); } -void ARMv4::BuggedJumpTo32(const u32 addr) +void ARMv5::JumpTo8_16Bit(const u32 addr) { - JumpTo(addr & ~1); // todo -} - -void ARMv4::BuggedJumpTo(const u32 addr) -{ - JumpTo(addr & ~1); // todo + // 8 and 16 loads (signed included) to pc + if (!(CP15Control & 0x1)) + { + // if the pu is disabled it behaves like a normal jump + JumpTo((CP15Control & (1<<15)) ? (addr & ~0x1) : addr); + } + else + { + if (addr & 0x3) + { + // if the pu is enabled it will always prefetch abort if not word aligned + // although it will still attempt (and fail) to enter thumb mode if enabled + if ((addr & 0x1) && !(CP15Control & (1<<15))) CPSR |= 0x20; + PrefetchAbort(); + } + else JumpTo(addr); + } } void ARMv4::JumpTo(u32 addr, bool restorecpsr) @@ -449,6 +419,11 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) } } +void ARMv4::JumpTo8_16Bit(const u32 addr) +{ + JumpTo(addr & ~1); // checkme? +} + void ARM::RestoreCPSR() { u32 oldcpsr = CPSR; diff --git a/src/ARM.h b/src/ARM.h index 9cda0be1..9b0511a3 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -65,9 +65,8 @@ public: virtual void FillPipeline() = 0; - virtual void BuggedJumpTo32(const u32 addr) = 0; - virtual void BuggedJumpTo(const u32 addr) = 0; virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; + virtual void JumpTo8_16Bit(u32 addr) = 0; void RestoreCPSR(); void Halt(u32 halt) @@ -239,9 +238,8 @@ public: void FillPipeline() override; - void BuggedJumpTo32(const u32 addr) override; - void BuggedJumpTo(const u32 addr) override; void JumpTo(u32 addr, bool restorecpsr = false) override; + void JumpTo8_16Bit(const u32 addr) override; void PrefetchAbort(); void DataAbort(); @@ -386,9 +384,8 @@ public: void FillPipeline() override; - void BuggedJumpTo32(const u32 addr) override; - void BuggedJumpTo(const u32 addr) override; void JumpTo(u32 addr, bool restorecpsr = false) override; + void JumpTo8_16Bit(const u32 addr) override; void Execute() override; #ifdef JIT_ENABLED diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 2f2a7912..8c96967e 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -141,7 +141,7 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; // TODO: user mode @@ -151,7 +151,7 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; @@ -262,7 +262,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->BuggedJumpTo32(val); \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), true); /* restores cpsr for some reason? */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -274,7 +274,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->BuggedJumpTo32(val); \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), true); /* restores cpsr for some reason? */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -308,7 +308,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -317,7 +317,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -327,7 +327,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -337,7 +337,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -347,7 +347,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -357,7 +357,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->BuggedJumpTo(val); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; From ca04710debd4c4a62fa5333c496b22ceadd0cf8c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 9 Jun 2024 22:31:10 -0400 Subject: [PATCH 027/115] ldrd is just ldm --- src/ARMInterpreter_LoadStore.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 8c96967e..1f43868f 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -262,7 +262,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), true); /* restores cpsr for some reason? */ \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -274,7 +274,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } /* checkme */ \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), true); /* restores cpsr for some reason? */ \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; From 3ddccde5b907fa7e379bd5296322858a74bc67ee Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:10:42 -0400 Subject: [PATCH 028/115] verified also remove no longer needed variable --- src/ARM.cpp | 2 -- src/ARM.h | 7 +++---- src/ARMInterpreter_LoadStore.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7bfb95a2..906a243e 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -177,8 +177,6 @@ void ARM::Reset() ExceptionBase = Num ? 0x00000000 : 0xFFFF0000; - BuggyJump = 0; - CodeMem.Mem = NULL; #ifdef JIT_ENABLED diff --git a/src/ARM.h b/src/ARM.h index 9b0511a3..7c5bb671 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -64,7 +64,7 @@ public: virtual void DoSavestate(Savestate* file); virtual void FillPipeline() = 0; - + virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; virtual void JumpTo8_16Bit(u32 addr) = 0; void RestoreCPSR(); @@ -174,7 +174,6 @@ public: u32 R_UND[3]; u32 CurInstr; u32 NextInstr[2]; - u32 BuggyJump; u32 ExceptionBase; @@ -237,7 +236,7 @@ public: void UpdateRegionTimings(u32 addrstart, u32 addrend); void FillPipeline() override; - + void JumpTo(u32 addr, bool restorecpsr = false) override; void JumpTo8_16Bit(const u32 addr) override; @@ -383,7 +382,7 @@ public: ARMv4(melonDS::NDS& nds, std::optional gdb, bool jit); void FillPipeline() override; - + void JumpTo(u32 addr, bool restorecpsr = false) override; void JumpTo8_16Bit(const u32 addr) override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 1f43868f..2e841549 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -259,7 +259,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ @@ -271,7 +271,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ @@ -283,7 +283,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + if (r&1) { A_UNK(cpu); return; } \ bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ @@ -295,7 +295,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ - if (r&1) { A_UNK(cpu); return; } /* checkme */ \ + if (r&1) { A_UNK(cpu); return; } \ bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ From 048b0b8878f610a7e71b5e944614c498b12adb44 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 10 Jun 2024 18:03:56 -0400 Subject: [PATCH 029/115] swp/swpb jumps work on the arm 7? --- src/ARMInterpreter_LoadStore.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 2e841549..7a33b8dd 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -262,7 +262,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -274,7 +274,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr due to shared ldm dna */ \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -408,9 +408,10 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - // rd only gets updated if both read and write succeed, and if rd isn't r15 + // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); + else if (cpu->Num) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? } cpu->DataCycles += numD; } @@ -429,9 +430,10 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - // rd only gets updated if both read and write succeed, and if rd isn't r15 + // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) cpu->R[rd] = val; + else if (cpu->Num) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? } cpu->DataCycles += numD; } From 42218106b04adb257ecc165d0e9b79a1065e65ed Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 11 Jun 2024 10:09:40 -0400 Subject: [PATCH 030/115] verify writable msr bits --- src/ARMInterpreter.cpp | 12 ++++++------ src/ARMInterpreter_LoadStore.cpp | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index d6c3a488..5a09d210 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -101,9 +101,9 @@ void A_MSR_IMM(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= (cpu->Num ? 0xF0000000 /* checkme */ : 0xF8000000); + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9 + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 + if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); if (!(cpu->CurInstr & (1<<22))) mask &= 0xFFFFFFDF; @@ -154,9 +154,9 @@ void A_MSR_REG(ARM* cpu) u32 mask = 0; if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF; - //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; - //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; - if (cpu->CurInstr & (1<<19)) mask |= (cpu->Num ? 0xF0000000 /* checkme */ : 0xF8000000); + //if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9 + //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 + if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); if (!(cpu->CurInstr & (1<<22))) mask &= 0xFFFFFFDF; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 7a33b8dd..4e705aed 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -411,7 +411,7 @@ void A_SWP(ARM* cpu) // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); - else if (cpu->Num) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? + else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? } cpu->DataCycles += numD; } @@ -433,7 +433,7 @@ void A_SWPB(ARM* cpu) // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) cpu->R[rd] = val; - else if (cpu->Num) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? + else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? } cpu->DataCycles += numD; } From 5a174a2ce38c40b317231428418ca4f67e4218aa Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 14 Jun 2024 00:51:55 -0400 Subject: [PATCH 031/115] track interlock cycles for load instructions --- src/ARM.cpp | 3 + src/ARM.h | 31 +++++++ src/ARMInterpreter_LoadStore.cpp | 142 ++++++++++++++++++++++++------- 3 files changed, 143 insertions(+), 33 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 906a243e..bac57879 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1314,6 +1314,9 @@ void ARMv4::AddCycles_CD() Cycles += numC + numD; } } + u64 ARMv5::Timestamp() { return NDS.ARM9Timestamp; } + + u64 ARMv4::Timestamp() { return NDS.ARM7Timestamp; } u8 ARMv5::BusRead8(u32 addr) { diff --git a/src/ARM.h b/src/ARM.h index 7c5bb671..9fb48930 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -143,6 +143,24 @@ public: virtual void AddCycles_CDI() = 0; virtual void AddCycles_CD() = 0; + inline void AddCycles_L(const u8 reg1) + { + Cycles += InterlockTimestamp[reg1]; + } + + inline void AddCycles_L(const u8 reg1, const u8 reg2) + { + Cycles += std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]); + } + + // Must be called after all of an instruction's cycles are calculated!!! + inline void SetCycles_L(const u8 reg, const u8 cycles, const u8 type) + { + InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; + } + + virtual u64 Timestamp() = 0; + void CheckGdbIncoming(); u32 Num; @@ -179,6 +197,15 @@ public: MemRegion CodeMem; + enum InterlockType + { + ILT_Norm = 0, + ILT_Mul = 1, + }; + + u8 InterlockType[16]; + u64 InterlockTimestamp[16]; + #ifdef JIT_ENABLED u32 FastBlockLookupStart, FastBlockLookupSize; u64* FastBlockLookup; @@ -299,6 +326,8 @@ public: // Cycles += numC + numD; } + u64 Timestamp() override; + void GetCodeMemRegion(u32 addr, MemRegion* region); void CP15Reset(); @@ -413,6 +442,8 @@ public: void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; void AddCycles_CD() override; + + u64 Timestamp() override; protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 4e705aed..4e93c749 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -115,6 +115,7 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (offset & 3) ? 2 : 1, cpu->ILT_Norm); \ } // TODO: user mode @@ -133,6 +134,7 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (addr & 3) ? 2 : 1, cpu->ILT_Norm); \ } #define A_LDRB \ @@ -141,8 +143,13 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } // TODO: user mode #define A_LDRB_POST \ @@ -151,8 +158,13 @@ namespace melonDS::ARMInterpreter cpu->AddCycles_CDI(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } @@ -260,23 +272,35 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else cpu->R[r+1] = val; \ + if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ + u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \ cpu->AddCycles_CDI(); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (dataabort) return; \ + if (r == 14) \ + cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else \ + { \ + cpu->R[r+1] = val; \ + cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ + } \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ #define A_LDRD_POST \ if (cpu->Num != 0) return; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ - if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else cpu->R[r+1] = val; \ + if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ + u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \ cpu->AddCycles_CDI(); \ + if (dataabort) return; \ + if (r == 14) \ + cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else \ + { \ + cpu->R[r+1] = val; \ + cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ + } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ @@ -308,8 +332,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ @@ -317,8 +346,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ @@ -327,8 +361,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ @@ -337,8 +376,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ @@ -347,8 +391,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ @@ -357,8 +406,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + if (((cpu->CurInstr>>12) & 0xF) == 15) \ + cpu->JumpTo8_16Bit(val); \ + else \ + { \ + cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ + cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ + } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -408,14 +462,21 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { + cpu->AddCycles_CDI(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); - else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? + if (rd != 15) + { + cpu->R[rd] = ROR(val, 8*(base&0x3)); + cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a misaligned load from a non-itcm address + } + else if (cpu->Num==1) // for some reason these jumps don't work on the arm 9? + cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); } + else cpu->AddCycles_CDI(); cpu->DataCycles += numD; } - cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI(); } void A_SWPB(ARM* cpu) @@ -430,14 +491,21 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { + cpu->AddCycles_CDI(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) cpu->R[rd] = val; - else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? + if (rd != 15) + { + cpu->R[rd] = val; + cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a load from a non-itcm address + } + else if (cpu->Num==1)// for some reason these jumps don't work on the arm 9? + cpu->JumpTo(val & ~1); } + else cpu->AddCycles_CDI(); cpu->DataCycles += numD; } - cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI(); } @@ -450,6 +518,7 @@ void A_LDM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + u8 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?) if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -486,6 +555,7 @@ void A_LDM(ARM* cpu) first = false; if (!preinc) base += 4; + lastreg = i; } } @@ -498,12 +568,18 @@ void A_LDM(ARM* cpu) { goto dataabort; } + cpu->AddCycles_CDI(); if (!preinc) base += 4; if (cpu->Num == 1) pc &= ~0x1; } + else + { + cpu->AddCycles_CDI(); + cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); // TODO: THIS DOESN'T APPLY WHEN LOADING FROM ITCM + } // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -537,6 +613,8 @@ void A_LDM(ARM* cpu) if (false) { dataabort: + cpu->AddCycles_CDI(); + // CHECKME: interlock shouldn't apply when it data aborts, right? // switch back to original set of regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -545,8 +623,6 @@ void A_LDM(ARM* cpu) // restore original value of base in case the reg got written to cpu->R[baseid] = oldbase; } - - cpu->AddCycles_CDI(); } void A_STM(ARM* cpu) From aa1217af0a2953dbdd3ddbe6563c6787d8013f34 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:47:42 -0400 Subject: [PATCH 032/115] track interlock cycles for the ALU --- src/ARMInterpreter_ALU.cpp | 107 +++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 46 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 315d59d0..0331aa08 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -767,12 +767,6 @@ void A_MUL(ARM* cpu) u32 res = rm * rs; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ(res & 0x80000000, - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -786,6 +780,13 @@ void A_MUL(ARM* cpu) } cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_MLA(ARM* cpu) @@ -797,12 +798,6 @@ void A_MLA(ARM* cpu) u32 res = (rm * rs) + rn; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ(res & 0x80000000, - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -816,6 +811,13 @@ void A_MLA(ARM* cpu) } cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_UMULL(ARM* cpu) @@ -827,12 +829,6 @@ void A_UMULL(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -846,6 +842,13 @@ void A_UMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_UMLAL(ARM* cpu) @@ -860,12 +863,6 @@ void A_UMLAL(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -879,6 +876,13 @@ void A_UMLAL(ARM* cpu) } cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMULL(ARM* cpu) @@ -890,12 +894,6 @@ void A_SMULL(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -909,6 +907,13 @@ void A_SMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAL(ARM* cpu) @@ -923,12 +928,6 @@ void A_SMLAL(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } u32 cycles; if (cpu->Num == 0) @@ -940,8 +939,15 @@ void A_SMLAL(ARM* cpu) else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; } - + cpu->AddCycles_CI(cycles); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } + else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAxy(ARM* cpu) @@ -964,7 +970,8 @@ void A_SMLAxy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); } void A_SMLAWy(ARM* cpu) @@ -985,7 +992,8 @@ void A_SMLAWy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); } void A_SMULxy(ARM* cpu) @@ -1003,7 +1011,8 @@ void A_SMULxy(ARM* cpu) u32 res = ((s16)rm * (s16)rs); cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); } void A_SMULWy(ARM* cpu) @@ -1019,7 +1028,8 @@ void A_SMULWy(ARM* cpu) u32 res = ((s64)(s32)rm * (s16)rs) >> 16; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); } void A_SMLALxy(ARM* cpu) @@ -1042,7 +1052,8 @@ void A_SMLALxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->AddCycles_CI(1); // TODO: interlock?? + cpu->AddCycles_CI(1); + cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); } @@ -1086,7 +1097,8 @@ void A_QADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); } void A_QSUB(ARM* cpu) @@ -1104,7 +1116,8 @@ void A_QSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); } void A_QDADD(ARM* cpu) @@ -1130,7 +1143,8 @@ void A_QDADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); } void A_QDSUB(ARM* cpu) @@ -1156,7 +1170,8 @@ void A_QDSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); // TODO: interlock?? + cpu->AddCycles_C(); + cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); } From a973c0bf5bdc983ee6547f8533ea0ff3d5c750b4 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 15 Jun 2024 16:07:36 -0400 Subject: [PATCH 033/115] initial implementation of interlock cycles --- src/ARM.cpp | 13 ++- src/ARM.h | 41 ++++++++-- src/ARMInterpreter.cpp | 4 +- src/ARMInterpreter_ALU.cpp | 134 +++++++++++++++---------------- src/ARMInterpreter_LoadStore.cpp | 112 +++++++++++++++----------- 5 files changed, 179 insertions(+), 125 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index bac57879..899fe661 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -190,6 +190,8 @@ void ARM::Reset() BreakReq = false; #endif + memset(InterlockTimestamp, 0, sizeof(InterlockTimestamp)); + // zorp JumpTo(ExceptionBase); } @@ -1314,9 +1316,16 @@ void ARMv4::AddCycles_CD() Cycles += numC + numD; } } - u64 ARMv5::Timestamp() { return NDS.ARM9Timestamp; } - u64 ARMv4::Timestamp() { return NDS.ARM7Timestamp; } +u64& ARMv5::Timestamp() +{ + return NDS.ARM9Timestamp; +} + +u64& ARMv4::Timestamp() +{ + return NDS.ARM7Timestamp; +} u8 ARMv5::BusRead8(u32 addr) { diff --git a/src/ARM.h b/src/ARM.h index 9fb48930..ff857db9 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,6 +30,8 @@ #include "debug/GdbStub.h" #endif +#define INTERLOCK + namespace melonDS { inline u32 ROR(u32 x, u32 n) @@ -143,23 +145,46 @@ public: virtual void AddCycles_CDI() = 0; virtual void AddCycles_CD() = 0; - inline void AddCycles_L(const u8 reg1) + inline void AddCycles_L(const u32 delay, const u32 reg1) { - Cycles += InterlockTimestamp[reg1]; + if (InterlockTimestamp[reg1] > Timestamp() + delay); + Timestamp() = InterlockTimestamp[reg1]; + } + + inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2) + { + u64 cycles = std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]); + if (cycles > Timestamp() + delay) + Timestamp() = cycles; + } + + inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2, const u32 reg3) + { + u64 cycles = std::max(InterlockTimestamp[reg1], std::max(InterlockTimestamp[reg2], InterlockTimestamp[reg3])); + if (cycles > Timestamp() + delay) + Timestamp() = cycles; } - inline void AddCycles_L(const u8 reg1, const u8 reg2) + // fetch the value of a register while handling any interlock cycles + inline u32 GetReg(const u32 reg, const u32 delay = 0) { - Cycles += std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]); +#ifdef INTERLOCK + if (InterlockTimestamp[reg] > (Timestamp() + delay)) + Timestamp() = InterlockTimestamp[reg] - delay; +#endif + return R[reg]; } // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u8 reg, const u8 cycles, const u8 type) + inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) { +#ifdef INTERLOCK InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; + //InterlockType[reg] = type; +#endif } - virtual u64 Timestamp() = 0; + virtual u64& Timestamp() = 0; void CheckGdbIncoming(); @@ -326,7 +351,7 @@ public: // Cycles += numC + numD; } - u64 Timestamp() override; + u64& Timestamp() override; void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -443,7 +468,7 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; - u64 Timestamp() override; + u64& Timestamp() override; protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 5a09d210..5621876a 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -163,7 +163,7 @@ void A_MSR_REG(ARM* cpu) if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; - u32 val = cpu->R[cpu->CurInstr & 0xF]; + u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); // bit4 is forced to 1 val |= 0x00000010; @@ -216,7 +216,7 @@ void A_MCR(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; - u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; + u32 val = cpu->GetReg((cpu->CurInstr>>12)&0xF); if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (cpu->Num==0 && cp==15) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 0331aa08..ac18872b 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -160,14 +160,14 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) cpu->SetC(b & 0x80000000); #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ - u32 b = cpu->R[cpu->CurInstr&0xF]; \ + u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ u32 s = (cpu->CurInstr>>7)&0x1F; \ shiftop(b, s); #define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ - u32 b = cpu->R[cpu->CurInstr&0xF]; \ + u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ if ((cpu->CurInstr&0xF)==15) b += 4; \ - shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); + shiftop(b, (cpu->GetReg((cpu->CurInstr>>8)&0xF) & 0xFF)); #define A_IMPLEMENT_ALU_OP(x,s) \ @@ -313,7 +313,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ #define A_AND(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a & b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ } #define A_AND_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) #define A_EOR(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a ^ b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -357,7 +357,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) } #define A_EOR_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -375,7 +375,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) #define A_SUB(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a - b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -388,7 +388,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) } #define A_SUB_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -408,7 +408,7 @@ A_IMPLEMENT_ALU_OP(SUB,) #define A_RSB(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = b - a; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -421,7 +421,7 @@ A_IMPLEMENT_ALU_OP(SUB,) } #define A_RSB_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = b - a; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -441,7 +441,7 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a + b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -454,7 +454,7 @@ A_IMPLEMENT_ALU_OP(RSB,) } #define A_ADD_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -474,7 +474,7 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -487,7 +487,7 @@ A_IMPLEMENT_ALU_OP(ADD,) } #define A_ADC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res_tmp = a + b; \ u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \ u32 res = res_tmp + carry; \ @@ -509,7 +509,7 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -522,7 +522,7 @@ A_IMPLEMENT_ALU_OP(ADC,) } #define A_SBC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res_tmp = a - b; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -544,7 +544,7 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -557,7 +557,7 @@ A_IMPLEMENT_ALU_OP(SBC,) } #define A_RSC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res_tmp = b - a; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -579,7 +579,7 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -589,7 +589,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -599,7 +599,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) #define A_ORR(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a | b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -636,7 +636,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) } #define A_ORR_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a | b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -699,7 +699,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) #define A_BIC(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a & ~b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -712,7 +712,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) } #define A_BIC_S(c) \ - u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 res = a & ~b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -761,8 +761,8 @@ A_IMPLEMENT_ALU_OP(MVN,_S) void A_MUL(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); u32 res = rm * rs; @@ -791,9 +791,9 @@ void A_MUL(ARM* cpu) void A_MLA(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); + u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF); u32 res = (rm * rs) + rn; @@ -822,8 +822,8 @@ void A_MLA(ARM* cpu) void A_UMULL(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); u64 res = (u64)rm * (u64)rs; @@ -848,17 +848,17 @@ void A_UMULL(ARM* cpu) !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_UMLAL(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); u64 res = (u64)rm * (u64)rs; - u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); + u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); // CHECKME: INTERLOCK? res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; @@ -887,8 +887,8 @@ void A_UMLAL(ARM* cpu) void A_SMULL(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); s64 res = (s64)(s32)rm * (s64)(s32)rs; @@ -913,17 +913,17 @@ void A_SMULL(ARM* cpu) !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAL(ARM* cpu) { - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); s64 res = (s64)(s32)rm * (s64)(s32)rs; - s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); // CHECKME: INTERLOCK? res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; @@ -947,16 +947,16 @@ void A_SMLAL(ARM* cpu) !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -978,9 +978,9 @@ void A_SMLAWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1000,8 +1000,8 @@ void A_SMULxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1019,8 +1019,8 @@ void A_SMULWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1036,8 +1036,8 @@ void A_SMLALxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 0); + u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 0); // yeah this one actually doesn't need two interlock cycles to interlock if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1053,7 +1053,7 @@ void A_SMLALxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); cpu->AddCycles_CI(1); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); } @@ -1062,7 +1062,7 @@ void A_CLZ(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 val = cpu->R[cpu->CurInstr & 0xF]; + u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); u32 res = 0; while ((val & 0xFF000000) == 0) @@ -1086,8 +1086,8 @@ void A_QADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); u32 res = rm + rn; if (OverflowAdd(rm, rn)) @@ -1105,8 +1105,8 @@ void A_QSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); u32 res = rm - rn; if (OverflowSub(rm, rn)) @@ -1124,8 +1124,8 @@ void A_QDADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); if (OverflowAdd(rn, rn)) { @@ -1151,8 +1151,8 @@ void A_QDSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); if (OverflowAdd(rn, rn)) { diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 4e93c749..a11e912d 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -53,7 +53,7 @@ namespace melonDS::ARMInterpreter if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_WB_CALC_OFFSET_REG(shiftop) \ - u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ + u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ u32 shift = ((cpu->CurInstr>>7)&0x1F); \ shiftop(offset, shift); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; @@ -61,8 +61,8 @@ namespace melonDS::ARMInterpreter #define A_STR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(offset, storeval); \ @@ -72,8 +72,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(addr, storeval); \ @@ -82,8 +82,8 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(offset, storeval); \ cpu->AddCycles_CD(); \ @@ -92,8 +92,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(addr, storeval); \ cpu->AddCycles_CD(); \ @@ -101,7 +101,7 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -120,7 +120,7 @@ namespace melonDS::ARMInterpreter // TODO: user mode #define A_LDR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -138,7 +138,7 @@ namespace melonDS::ARMInterpreter } #define A_LDRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -153,7 +153,7 @@ namespace melonDS::ARMInterpreter // TODO: user mode #define A_LDRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -242,14 +242,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_HD_CALC_OFFSET_REG \ - u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ + u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_STRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(offset, storeval); \ cpu->AddCycles_CD(); \ @@ -257,8 +257,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(addr, storeval); \ cpu->AddCycles_CD(); \ @@ -269,7 +269,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRD \ if (cpu->Num != 0) return; \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ @@ -287,7 +287,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ @@ -305,11 +305,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRD \ if (cpu->Num != 0) return; \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ - u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(offset, cpu->GetReg(r)); /* yes, this data abort behavior is on purpose */ \ + u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ if (dataabort) return; \ @@ -317,18 +317,18 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ - u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(addr, cpu->GetReg(r)); \ + u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -342,7 +342,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -356,7 +356,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -371,7 +371,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -386,7 +386,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -401,7 +401,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI(); \ if (dataabort) return; \ @@ -452,8 +452,8 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) void A_SWP(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -468,9 +468,18 @@ void A_SWP(ARM* cpu) if (rd != 15) { cpu->R[rd] = ROR(val, 8*(base&0x3)); - cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a misaligned load from a non-itcm address + + u32 cycles; + if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?) + { + if (cpu->Num == 1) cycles = 2; // checkme + else cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; + } + else cycles = 1; + + cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); } - else if (cpu->Num==1) // for some reason these jumps don't work on the arm 9? + else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); } else cpu->AddCycles_CDI(); @@ -481,8 +490,8 @@ void A_SWP(ARM* cpu) void A_SWPB(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; + u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1) & 0xFF; + u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -497,9 +506,15 @@ void A_SWPB(ARM* cpu) if (rd != 15) { cpu->R[rd] = val; - cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a load from a non-itcm address + + // add an extra interlock cycle when doing a load from a non-itcm address (checkme: does it matter whether you're executing from there?) + u32 cycles; + if (cpu->Num == 1) cycles = 2; // checkme + else cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; + + cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); } - else if (cpu->Num==1)// for some reason these jumps don't work on the arm 9? + else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? cpu->JumpTo(val & ~1); } else cpu->AddCycles_CDI(); @@ -513,12 +528,12 @@ void A_SWPB(ARM* cpu) void A_LDM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->R[baseid]; + u32 base = cpu->GetReg(baseid, 1); u32 wbbase; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - u8 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?) + u32 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?) if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -554,8 +569,8 @@ void A_LDM(ARM* cpu) } first = false; - if (!preinc) base += 4; lastreg = i; + if (!preinc) base += 4; } } @@ -578,7 +593,12 @@ void A_LDM(ARM* cpu) else { cpu->AddCycles_CDI(); - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); // TODO: THIS DOESN'T APPLY WHEN LOADING FROM ITCM + + u32 lastbase = base; + if (!preinc) lastbase -= 4; + // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) + if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) + cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); } // switch back to previous regs @@ -628,7 +648,7 @@ void A_LDM(ARM* cpu) void A_STM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->R[baseid]; + u32 base = cpu->GetReg(baseid, 1); u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; @@ -672,7 +692,7 @@ void A_STM(ARM* cpu) val = oldbase; else val = base; } - else val = cpu->R[i]; + else val = cpu->GetReg(i, 1+cpu->DataCycles); if (i == 15) val+=4; From 449557624d3577f26c39520c761ca69d6e297ce4 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 15 Jun 2024 18:37:31 -0400 Subject: [PATCH 034/115] don't do interlocks for the arm7 --- src/ARM.h | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index ff857db9..739c704f 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -166,23 +166,10 @@ public: } // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) - { -#ifdef INTERLOCK - if (InterlockTimestamp[reg] > (Timestamp() + delay)) - Timestamp() = InterlockTimestamp[reg] - delay; -#endif - return R[reg]; - } + virtual inline u32 GetReg(const u32 reg, const u32 delay = 0) = 0; // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) - { -#ifdef INTERLOCK - InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; - //InterlockType[reg] = type; -#endif - } + virtual inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) = 0; virtual u64& Timestamp() = 0; @@ -351,6 +338,25 @@ public: // Cycles += numC + numD; } + // fetch the value of a register while handling any interlock cycles + inline u32 GetReg(const u32 reg, const u32 delay = 0) override + { +#ifdef INTERLOCK + if (InterlockTimestamp[reg] > (Timestamp() + delay)) + Timestamp() = InterlockTimestamp[reg] - delay; +#endif + return R[reg]; + } + + // Must be called after all of an instruction's cycles are calculated!!! + inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override + { +#ifdef INTERLOCK + InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; + //InterlockType[reg] = type; +#endif + } + u64& Timestamp() override; void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -468,6 +474,15 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; + // fetch the value of a register while handling any interlock cycles + inline u32 GetReg(const u32 reg, const u32 delay = 0) override + { + return R[reg]; + } + + // Must be called after all of an instruction's cycles are calculated!!! + inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override{} + u64& Timestamp() override; protected: u8 BusRead8(u32 addr) override; From debaaa0425a921817fc1701bca8f645f8248fd76 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 15 Jun 2024 18:47:56 -0400 Subject: [PATCH 035/115] fix performance regression for disabling interlock emulation path --- src/ARM.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 739c704f..e5d82ddf 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -145,6 +145,7 @@ public: virtual void AddCycles_CDI() = 0; virtual void AddCycles_CD() = 0; +/* inline void AddCycles_L(const u32 delay, const u32 reg1) { if (InterlockTimestamp[reg1] > Timestamp() + delay); @@ -163,13 +164,24 @@ public: u64 cycles = std::max(InterlockTimestamp[reg1], std::max(InterlockTimestamp[reg2], InterlockTimestamp[reg3])); if (cycles > Timestamp() + delay) Timestamp() = cycles; - } - + }*/ + +#ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles virtual inline u32 GetReg(const u32 reg, const u32 delay = 0) = 0; // Must be called after all of an instruction's cycles are calculated!!! virtual inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) = 0; +#else + // fetch the value of a register while handling any interlock cycles + inline u32 GetReg(const u32 reg, const u32 delay = 0) + { + return R[reg]; + } + + // Must be called after all of an instruction's cycles are calculated!!! + inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) {} +#endif virtual u64& Timestamp() = 0; @@ -337,25 +349,23 @@ public: //else // Cycles += numC + numD; } - + +#ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles inline u32 GetReg(const u32 reg, const u32 delay = 0) override { -#ifdef INTERLOCK if (InterlockTimestamp[reg] > (Timestamp() + delay)) Timestamp() = InterlockTimestamp[reg] - delay; -#endif return R[reg]; } // Must be called after all of an instruction's cycles are calculated!!! inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override { -#ifdef INTERLOCK InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; //InterlockType[reg] = type; -#endif } +#endif u64& Timestamp() override; @@ -474,6 +484,7 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; +#ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles inline u32 GetReg(const u32 reg, const u32 delay = 0) override { @@ -482,6 +493,7 @@ public: // Must be called after all of an instruction's cycles are calculated!!! inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override{} +#endif u64& Timestamp() override; protected: From 5b37ca70d153a67a988a2f4c35ebd271157410fc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 16 Jun 2024 20:44:55 -0400 Subject: [PATCH 036/115] implement correct/guess interlocks for remaining instructions --- src/ARMInterpreter_ALU.cpp | 130 +++++++++++++------------- src/ARMInterpreter_Branch.cpp | 16 ++-- src/ARMInterpreter_LoadStore.cpp | 155 +++++++++++++++++++------------ 3 files changed, 168 insertions(+), 133 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index ac18872b..17afa833 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1180,9 +1180,9 @@ void A_QDSUB(ARM* cpu) -void T_LSL_IMM(ARM* cpu) +void T_LSL_IMM(ARM* cpu) // verify interlock { - u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; LSL_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1191,9 +1191,9 @@ void T_LSL_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_LSR_IMM(ARM* cpu) +void T_LSR_IMM(ARM* cpu) // verify interlock { - u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; LSR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1202,9 +1202,9 @@ void T_LSR_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_ASR_IMM(ARM* cpu) +void T_ASR_IMM(ARM* cpu) // verify interlock { - u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; ASR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1215,8 +1215,8 @@ void T_ASR_IMM(ARM* cpu) void T_ADD_REG_(ARM* cpu) { - u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1228,8 +1228,8 @@ void T_ADD_REG_(ARM* cpu) void T_SUB_REG_(ARM* cpu) { - u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1239,9 +1239,9 @@ void T_SUB_REG_(ARM* cpu) cpu->AddCycles_C(); } -void T_ADD_IMM_(ARM* cpu) +void T_ADD_IMM_(ARM* cpu) // verify interlock { - u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1252,9 +1252,9 @@ void T_ADD_IMM_(ARM* cpu) cpu->AddCycles_C(); } -void T_SUB_IMM_(ARM* cpu) +void T_SUB_IMM_(ARM* cpu) // verify interlock { - u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1265,7 +1265,7 @@ void T_SUB_IMM_(ARM* cpu) cpu->AddCycles_C(); } -void T_MOV_IMM(ARM* cpu) +void T_MOV_IMM(ARM* cpu) // verify interlock { u32 b = cpu->CurInstr & 0xFF; cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; @@ -1274,7 +1274,7 @@ void T_MOV_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_CMP_IMM(ARM* cpu) +void T_CMP_IMM(ARM* cpu) // verify interlock { u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; @@ -1286,9 +1286,9 @@ void T_CMP_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_ADD_IMM(ARM* cpu) +void T_ADD_IMM(ARM* cpu) // verify interlock { - u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); u32 b = cpu->CurInstr & 0xFF; u32 res = a + b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1299,9 +1299,9 @@ void T_ADD_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_SUB_IMM(ARM* cpu) +void T_SUB_IMM(ARM* cpu) // verify interlock { - u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1315,8 +1315,8 @@ void T_SUB_IMM(ARM* cpu) void T_AND_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a & b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1326,8 +1326,8 @@ void T_AND_REG(ARM* cpu) void T_EOR_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a ^ b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1337,8 +1337,8 @@ void T_EOR_REG(ARM* cpu) void T_LSL_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; LSL_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1348,8 +1348,8 @@ void T_LSL_REG(ARM* cpu) void T_LSR_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; LSR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1359,8 +1359,8 @@ void T_LSR_REG(ARM* cpu) void T_ASR_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; ASR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1370,8 +1370,8 @@ void T_ASR_REG(ARM* cpu) void T_ADC_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res_tmp = a + b; u32 carry = (cpu->CPSR&0x20000000 ? 1:0); u32 res = res_tmp + carry; @@ -1385,8 +1385,8 @@ void T_ADC_REG(ARM* cpu) void T_SBC_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res_tmp = a - b; u32 carry = (cpu->CPSR&0x20000000 ? 0:1); u32 res = res_tmp - carry; @@ -1400,8 +1400,8 @@ void T_SBC_REG(ARM* cpu) void T_ROR_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; ROR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1411,8 +1411,8 @@ void T_ROR_REG(ARM* cpu) void T_TST_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); @@ -1421,7 +1421,7 @@ void T_TST_REG(ARM* cpu) void T_NEG_REG(ARM* cpu) { - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = -b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1433,8 +1433,8 @@ void T_NEG_REG(ARM* cpu) void T_CMP_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a - b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1445,8 +1445,8 @@ void T_CMP_REG(ARM* cpu) void T_CMN_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a + b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1457,8 +1457,8 @@ void T_CMN_REG(ARM* cpu) void T_ORR_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a | b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1468,8 +1468,8 @@ void T_ORR_REG(ARM* cpu) void T_MUL_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a * b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1493,8 +1493,8 @@ void T_MUL_REG(ARM* cpu) void T_BIC_REG(ARM* cpu) { - u32 a = cpu->R[cpu->CurInstr & 0x7]; - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 a = cpu->GetReg(cpu->CurInstr & 0x7); + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = a & ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1504,7 +1504,7 @@ void T_BIC_REG(ARM* cpu) void T_MVN_REG(ARM* cpu) { - u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 res = ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1516,13 +1516,13 @@ void T_MVN_REG(ARM* cpu) // TODO: check those when MSBs and MSBd are cleared // GBAtek says it's not allowed, but it works atleast on the ARM9 -void T_ADD_HIREG(ARM* cpu) +void T_ADD_HIREG(ARM* cpu) // verify interlock { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->R[rd]; - u32 b = cpu->R[rs]; + u32 a = cpu->GetReg(rd); + u32 b = cpu->GetReg(rs); cpu->AddCycles_C(); @@ -1536,13 +1536,13 @@ void T_ADD_HIREG(ARM* cpu) } } -void T_CMP_HIREG(ARM* cpu) +void T_CMP_HIREG(ARM* cpu) // verify interlock { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->R[rd]; - u32 b = cpu->R[rs]; + u32 a = cpu->GetReg(rd); + u32 b = cpu->GetReg(rs); u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1552,7 +1552,7 @@ void T_CMP_HIREG(ARM* cpu) cpu->AddCycles_C(); } -void T_MOV_HIREG(ARM* cpu) +void T_MOV_HIREG(ARM* cpu) // verify interlock { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; @@ -1561,11 +1561,11 @@ void T_MOV_HIREG(ARM* cpu) if (rd == 15) { - cpu->JumpTo(cpu->R[rs] | 1); + cpu->JumpTo(cpu->GetReg(rs) | 1); } else { - cpu->R[rd] = cpu->R[rs]; + cpu->R[rd] = cpu->GetReg(rs); } // nocash-style debugging hook @@ -1582,25 +1582,25 @@ void T_MOV_HIREG(ARM* cpu) } -void T_ADD_PCREL(ARM* cpu) +void T_ADD_PCREL(ARM* cpu) // verify interlock { - u32 val = cpu->R[15] & ~2; + u32 val = cpu->GetReg(15) & ~2; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->AddCycles_C(); } -void T_ADD_SPREL(ARM* cpu) +void T_ADD_SPREL(ARM* cpu) // verify interlock { - u32 val = cpu->R[13]; + u32 val = cpu->GetReg(13); val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->AddCycles_C(); } -void T_ADD_SP(ARM* cpu) +void T_ADD_SP(ARM* cpu) // verify interlock { - u32 val = cpu->R[13]; + u32 val = cpu->GetReg(13); if (cpu->CurInstr & (1<<7)) val -= ((cpu->CurInstr & 0x7F) << 2); else diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 015f5682..45f0440d 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -46,15 +46,15 @@ void A_BLX_IMM(ARM* cpu) cpu->JumpTo(cpu->R[15] + offset + 1); } -void A_BX(ARM* cpu) +void A_BX(ARM* cpu) // verify interlock { - cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); + cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); } -void A_BLX_REG(ARM* cpu) +void A_BLX_REG(ARM* cpu) // verify interlock { u32 lr = cpu->R[15] - 4; - cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); + cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); cpu->R[14] = lr; } @@ -71,12 +71,12 @@ void T_BCOND(ARM* cpu) cpu->AddCycles_C(); } -void T_BX(ARM* cpu) +void T_BX(ARM* cpu) // verify interlock { - cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); + cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); } -void T_BLX_REG(ARM* cpu) +void T_BLX_REG(ARM* cpu) // verify interlock { if (cpu->Num==1) { @@ -85,7 +85,7 @@ void T_BLX_REG(ARM* cpu) } u32 lr = cpu->R[15] - 1; - cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); + cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); cpu->R[14] = lr; } diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index a11e912d..c25896ea 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -469,15 +469,17 @@ void A_SWP(ARM* cpu) { cpu->R[rd] = ROR(val, 8*(base&0x3)); - u32 cycles; - if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?) + if (cpu->Num == 0) { - if (cpu->Num == 1) cycles = 2; // checkme - else cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; - } - else cycles = 1; + u32 cycles; + if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?) + { + cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; + } + else cycles = 1; - cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); + cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); + } } else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); @@ -508,11 +510,8 @@ void A_SWPB(ARM* cpu) cpu->R[rd] = val; // add an extra interlock cycle when doing a load from a non-itcm address (checkme: does it matter whether you're executing from there?) - u32 cycles; - if (cpu->Num == 1) cycles = 2; // checkme - else cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; - - cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); + if (cpu->Num == 0) + cpu->SetCycles_L(rd, ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2, cpu->ILT_Norm); } else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? cpu->JumpTo(val & ~1); @@ -594,11 +593,14 @@ void A_LDM(ARM* cpu) { cpu->AddCycles_CDI(); - u32 lastbase = base; - if (!preinc) lastbase -= 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); + if (cpu->Num == 0) + { + u32 lastbase = base; + if (!preinc) lastbase -= 4; + // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) + if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) + cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); + } } // switch back to previous regs @@ -736,160 +738,170 @@ void A_STM(ARM* cpu) -void T_LDR_PCREL(ARM* cpu) +void T_LDR_PCREL(ARM* cpu) // verify interlock { - u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); + u32 addr = (cpu->GetReg(15) & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[(cpu->CurInstr >> 8) & 0x7], 1, cpu->ILT_Norm); // checkme? ROR? } -void T_STR_REG(ARM* cpu) +void T_STR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + cpu->DataWrite32(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } void T_STRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + cpu->DataWrite8(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } void T_LDR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); u32 val; if (cpu->DataRead32(addr, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], (addr & 3) ? 2 : 1, cpu->ILT_Norm); } void T_LDRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } void T_STRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + cpu->DataWrite16(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } void T_LDRSB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } void T_LDRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } void T_LDRSH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } -void T_STR_IMM(ARM* cpu) +void T_STR_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); + cpu->DataWrite32(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } -void T_LDR_IMM(ARM* cpu) +void T_LDR_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 val; if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], (offset & 3) ? 2 : 1, cpu->ILT_Norm); } -void T_STRB_IMM(ARM* cpu) +void T_STRB_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); + cpu->DataWrite8(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } -void T_LDRB_IMM(ARM* cpu) +void T_LDRB_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } -void T_STRH_IMM(ARM* cpu) +void T_STRH_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); + cpu->DataWrite16(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); cpu->AddCycles_CD(); } -void T_LDRH_IMM(ARM* cpu) +void T_LDRH_IMM(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; + offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); } -void T_STR_SPREL(ARM* cpu) +void T_STR_SPREL(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; + offset += cpu->GetReg(13); - cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); + cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1)); cpu->AddCycles_CD(); } -void T_LDR_SPREL(ARM* cpu) +void T_LDR_SPREL(ARM* cpu) // verify interlock { u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; + offset += cpu->GetReg(13); cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); + cpu->SetCycles_L(cpu->R[(cpu->CurInstr >> 8) & 0x7], 1, cpu->ILT_Norm); // checkme? ROR? } @@ -907,7 +919,7 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) nregs++; - u32 base = cpu->R[13]; + u32 base = cpu->GetReg(13); base -= (nregs<<2); u32 wbbase = base; @@ -915,8 +927,8 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i]))) + if (!(first ? cpu->DataWrite32 (base, cpu->GetReg(i, 1)) + : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) // verify interlock { goto dataabort; } @@ -940,10 +952,11 @@ void T_PUSH(ARM* cpu) cpu->AddCycles_CD(); } -void T_POP(ARM* cpu) +void T_POP(ARM* cpu) // verify interlock { - u32 base = cpu->R[13]; + u32 base = cpu->GetReg(13); bool first = true; + u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -974,21 +987,30 @@ void T_POP(ARM* cpu) cpu->R[13] = base; + if (cpu->Num == 0) + { + u32 lastbase = base - 4; + // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) + if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) + cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); + } + return; + dataabort: cpu->AddCycles_CDI(); } void T_STMIA(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); bool first = true; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i]))) + if (!(first ? cpu->DataWrite32 (base, cpu->GetReg(i, 1)) + : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) { goto dataabort; } @@ -1005,8 +1027,9 @@ void T_STMIA(ARM* cpu) void T_LDMIA(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); bool first = true; + u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -1019,11 +1042,23 @@ void T_LDMIA(ARM* cpu) } first = false; base += 4; + lastreg = i; } } if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; + + + cpu->AddCycles_CDI(); + if (cpu->Num == 0) + { + u32 lastbase = base - 4; + // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) + if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) + cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); + } + return; dataabort: cpu->AddCycles_CDI(); From f00f1f6ca482758fdbd53d93e8c80c04bf6caa93 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 16 Jun 2024 20:50:42 -0400 Subject: [PATCH 037/115] im smart --- src/ARMInterpreter_LoadStore.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index c25896ea..52a80983 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -744,7 +744,7 @@ void T_LDR_PCREL(ARM* cpu) // verify interlock cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[(cpu->CurInstr >> 8) & 0x7], 1, cpu->ILT_Norm); // checkme? ROR? + cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme? ROR? } @@ -773,7 +773,7 @@ void T_LDR_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], (addr & 3) ? 2 : 1, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, (addr & 3) ? 2 : 1, cpu->ILT_Norm); } void T_LDRB_REG(ARM* cpu) @@ -782,7 +782,7 @@ void T_LDRB_REG(ARM* cpu) cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -801,7 +801,7 @@ void T_LDRSB_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRH_REG(ARM* cpu) @@ -810,7 +810,7 @@ void T_LDRH_REG(ARM* cpu) cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRSH_REG(ARM* cpu) @@ -820,7 +820,7 @@ void T_LDRSH_REG(ARM* cpu) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -842,7 +842,7 @@ void T_LDR_IMM(ARM* cpu) // verify interlock if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], (offset & 3) ? 2 : 1, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm); } void T_STRB_IMM(ARM* cpu) // verify interlock @@ -861,7 +861,7 @@ void T_LDRB_IMM(ARM* cpu) // verify interlock cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -881,7 +881,7 @@ void T_LDRH_IMM(ARM* cpu) // verify interlock cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[cpu->CurInstr & 0x7], 2, cpu->ILT_Norm); + cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -901,7 +901,7 @@ void T_LDR_SPREL(ARM* cpu) // verify interlock cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L(cpu->R[(cpu->CurInstr >> 8) & 0x7], 1, cpu->ILT_Norm); // checkme? ROR? + cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme? ROR? } From a9e2c7e047eb62f56d121bf12b82703bb7da07d9 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 16 Jun 2024 23:24:20 -0400 Subject: [PATCH 038/115] implement two regs i missed --- src/ARMInterpreter_ALU.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 17afa833..92c027f3 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -858,7 +858,7 @@ void A_UMLAL(ARM* cpu) u64 res = (u64)rm * (u64)rs; - u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); // CHECKME: INTERLOCK? + u64 rd = (u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; @@ -923,7 +923,7 @@ void A_SMLAL(ARM* cpu) s64 res = (s64)(s32)rm * (s64)(s32)rs; - s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); // CHECKME: INTERLOCK? + s64 rd = (s64)((u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL)); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; From c5258d6377f72053be136254a92b19c96065167e Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 17 Jun 2024 18:07:53 -0400 Subject: [PATCH 039/115] verify interlocks for alu and load/store remove some checks for interlock that im pretty sure can't trigger --- src/ARMInterpreter_ALU.cpp | 38 ++++++++++++++++---------------- src/ARMInterpreter_LoadStore.cpp | 32 +++++++++++++-------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 92c027f3..be0498e1 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1180,7 +1180,7 @@ void A_QDSUB(ARM* cpu) -void T_LSL_IMM(ARM* cpu) // verify interlock +void T_LSL_IMM(ARM* cpu) { u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; @@ -1191,7 +1191,7 @@ void T_LSL_IMM(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_LSR_IMM(ARM* cpu) // verify interlock +void T_LSR_IMM(ARM* cpu) { u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; @@ -1202,7 +1202,7 @@ void T_LSR_IMM(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_ASR_IMM(ARM* cpu) // verify interlock +void T_ASR_IMM(ARM* cpu) { u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 s = (cpu->CurInstr >> 6) & 0x1F; @@ -1239,7 +1239,7 @@ void T_SUB_REG_(ARM* cpu) cpu->AddCycles_C(); } -void T_ADD_IMM_(ARM* cpu) // verify interlock +void T_ADD_IMM_(ARM* cpu) { u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 b = (cpu->CurInstr >> 6) & 0x7; @@ -1252,7 +1252,7 @@ void T_ADD_IMM_(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_SUB_IMM_(ARM* cpu) // verify interlock +void T_SUB_IMM_(ARM* cpu) { u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); u32 b = (cpu->CurInstr >> 6) & 0x7; @@ -1265,7 +1265,7 @@ void T_SUB_IMM_(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_MOV_IMM(ARM* cpu) // verify interlock +void T_MOV_IMM(ARM* cpu) { u32 b = cpu->CurInstr & 0xFF; cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; @@ -1274,9 +1274,9 @@ void T_MOV_IMM(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_CMP_IMM(ARM* cpu) // verify interlock +void T_CMP_IMM(ARM* cpu) { - u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; + u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1286,7 +1286,7 @@ void T_CMP_IMM(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_ADD_IMM(ARM* cpu) // verify interlock +void T_ADD_IMM(ARM* cpu) { u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); u32 b = cpu->CurInstr & 0xFF; @@ -1299,7 +1299,7 @@ void T_ADD_IMM(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_SUB_IMM(ARM* cpu) // verify interlock +void T_SUB_IMM(ARM* cpu) { u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); u32 b = cpu->CurInstr & 0xFF; @@ -1516,7 +1516,7 @@ void T_MVN_REG(ARM* cpu) // TODO: check those when MSBs and MSBd are cleared // GBAtek says it's not allowed, but it works atleast on the ARM9 -void T_ADD_HIREG(ARM* cpu) // verify interlock +void T_ADD_HIREG(ARM* cpu) { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; @@ -1536,7 +1536,7 @@ void T_ADD_HIREG(ARM* cpu) // verify interlock } } -void T_CMP_HIREG(ARM* cpu) // verify interlock +void T_CMP_HIREG(ARM* cpu) { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; @@ -1552,7 +1552,7 @@ void T_CMP_HIREG(ARM* cpu) // verify interlock cpu->AddCycles_C(); } -void T_MOV_HIREG(ARM* cpu) // verify interlock +void T_MOV_HIREG(ARM* cpu) { u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; @@ -1582,25 +1582,25 @@ void T_MOV_HIREG(ARM* cpu) // verify interlock } -void T_ADD_PCREL(ARM* cpu) // verify interlock +void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock? { - u32 val = cpu->GetReg(15) & ~2; + u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->AddCycles_C(); } -void T_ADD_SPREL(ARM* cpu) // verify interlock +void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? { - u32 val = cpu->GetReg(13); + u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->AddCycles_C(); } -void T_ADD_SP(ARM* cpu) // verify interlock +void T_ADD_SP(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? { - u32 val = cpu->GetReg(13); + u32 val = cpu->R[13]; if (cpu->CurInstr & (1<<7)) val -= ((cpu->CurInstr & 0x7F) << 2); else diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 52a80983..3fac1963 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -738,13 +738,13 @@ void A_STM(ARM* cpu) -void T_LDR_PCREL(ARM* cpu) // verify interlock +void T_LDR_PCREL(ARM* cpu) // checkme: can pc be interlocked? { - u32 addr = (cpu->GetReg(15) & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); + u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme? ROR? + cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -824,7 +824,7 @@ void T_LDRSH_REG(ARM* cpu) } -void T_STR_IMM(ARM* cpu) // verify interlock +void T_STR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -833,7 +833,7 @@ void T_STR_IMM(ARM* cpu) // verify interlock cpu->AddCycles_CD(); } -void T_LDR_IMM(ARM* cpu) // verify interlock +void T_LDR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -845,7 +845,7 @@ void T_LDR_IMM(ARM* cpu) // verify interlock cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm); } -void T_STRB_IMM(ARM* cpu) // verify interlock +void T_STRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -854,7 +854,7 @@ void T_STRB_IMM(ARM* cpu) // verify interlock cpu->AddCycles_CD(); } -void T_LDRB_IMM(ARM* cpu) // verify interlock +void T_LDRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -865,7 +865,7 @@ void T_LDRB_IMM(ARM* cpu) // verify interlock } -void T_STRH_IMM(ARM* cpu) // verify interlock +void T_STRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -874,7 +874,7 @@ void T_STRH_IMM(ARM* cpu) // verify interlock cpu->AddCycles_CD(); } -void T_LDRH_IMM(ARM* cpu) // verify interlock +void T_LDRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); @@ -885,23 +885,23 @@ void T_LDRH_IMM(ARM* cpu) // verify interlock } -void T_STR_SPREL(ARM* cpu) // verify interlock +void T_STR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? { u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->GetReg(13); + offset += cpu->R[13]; cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1)); cpu->AddCycles_CD(); } -void T_LDR_SPREL(ARM* cpu) // verify interlock +void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? { u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->GetReg(13); + offset += cpu->R[13]; cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme? ROR? + cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -952,9 +952,9 @@ void T_PUSH(ARM* cpu) cpu->AddCycles_CD(); } -void T_POP(ARM* cpu) // verify interlock +void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? { - u32 base = cpu->GetReg(13); + u32 base = cpu->R[13]; bool first = true; u32 lastreg = 0; From e6ba4075b9e4d4598a8b566667c6c2ebcc984d33 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 18 Jun 2024 11:12:05 -0400 Subject: [PATCH 040/115] correct interlocked reg for umlal --- src/ARMInterpreter_ALU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index be0498e1..44ee84aa 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -882,7 +882,7 @@ void A_UMLAL(ARM* cpu) !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMULL(ARM* cpu) From f1b71fe5a9886a2840d747ebf9494c7bf27e2324 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:15:04 -0400 Subject: [PATCH 041/115] implement configurable vram bus width not implemented for direct boot --- src/DSi.cpp | 18 ++++++++++++++++++ src/DSi.h | 1 + 2 files changed, 19 insertions(+) diff --git a/src/DSi.cpp b/src/DSi.cpp index 306c5d1c..5b98957c 100644 --- a/src/DSi.cpp +++ b/src/DSi.cpp @@ -142,6 +142,7 @@ void DSi::Reset() SCFG_Clock9 = 0x0187; // CHECKME SCFG_Clock7 = 0x0187; SCFG_EXT[0] = 0x8307F100; + SetVRAMTimings(true); SCFG_EXT[1] = 0x93FFFB06; SCFG_MC = 0x0010 | (~((u32)(NDSCartSlot.GetCart() != nullptr))&1);//0x0011; SCFG_RST = 0; @@ -215,6 +216,7 @@ void DSi::DoSavestateExtra(Savestate* file) Set_SCFG_Clock9(SCFG_Clock9); Set_SCFG_MC(SCFG_MC); DSP.SetRstLine(SCFG_RST & 0x0001); + SetVRAMTimings(SCFG_EXT[0] & (1<<13)); MBK[0][8] = 0; MBK[1][8] = 0; @@ -693,6 +695,7 @@ void DSi::SoftReset() SCFG_Clock9 = 0x0187; // CHECKME SCFG_Clock7 = 0x0187; SCFG_EXT[0] = 0x8307F100; + SetVRAMTimings(true); SCFG_EXT[1] = 0x93FFFB06; SCFG_MC = 0x0010;//0x0011; // TODO: is this actually reset? @@ -1283,6 +1286,14 @@ void DSi::Set_SCFG_MC(u32 val) } } +void DSi::SetVRAMTimings(bool extrabuswidth) +{ + if (extrabuswidth) + SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 32, 1, 1); // dsi vram + else + SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 16, 1, 1); // ds vram +} + u8 DSi::ARM9Read8(u32 addr) { @@ -2521,11 +2532,18 @@ void DSi::ARM9IOWrite32(u32 addr, u32 val) u32 oldram = (SCFG_EXT[0] >> 14) & 0x3; u32 newram = (val >> 14) & 0x3; + u32 oldvram = (SCFG_EXT[0] & (1<<13)); + u32 newvram = (val & (1<<13)); + SCFG_EXT[0] &= ~0x8007F19F; SCFG_EXT[0] |= (val & 0x8007F19F); SCFG_EXT[1] &= ~0x0000F080; SCFG_EXT[1] |= (val & 0x0000F080); Log(LogLevel::Debug, "SCFG_EXT = %08X / %08X (val9 %08X)\n", SCFG_EXT[0], SCFG_EXT[1], val); + + if (oldvram != newvram) + SetVRAMTimings(newvram); + /*switch ((SCFG_EXT[0] >> 14) & 0x3) { case 0: diff --git a/src/DSi.h b/src/DSi.h index 1d010e0f..755e1f50 100644 --- a/src/DSi.h +++ b/src/DSi.h @@ -96,6 +96,7 @@ public: void MapNWRAM_B(u32 num, u8 val); void MapNWRAM_C(u32 num, u8 val); void MapNWRAMRange(u32 cpu, u32 num, u32 val); + void SetVRAMTimings(bool extrabuswidth); u8 ARM9Read8(u32 addr) override; u16 ARM9Read16(u32 addr) override; From 3583d8222fbeaaa06ebe02f2bf430ccc402d0a32 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:17:04 -0400 Subject: [PATCH 042/115] disable interlock emulation, needs more research --- src/ARM.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARM.h b/src/ARM.h index e5d82ddf..3ef0d439 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,7 +30,7 @@ #include "debug/GdbStub.h" #endif -#define INTERLOCK +//#define INTERLOCK namespace melonDS { From 109bbed3d0959b07c03c0bde36118f685497cb6f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 24 Jun 2024 19:44:38 -0400 Subject: [PATCH 043/115] improve ldm timings I believe this also applies to other loads as well, but currently untested. --- src/ARM.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ARM.h | 13 +------------ src/CP15.cpp | 19 ++++++++++-------- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 899fe661..cb72dad5 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -302,6 +302,10 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) u32 oldregion = R[15] >> 24; u32 newregion = addr >> 24; + + if (addr < ITCMSize) CodeRegion = Mem9_ITCM; + else if ((addr & DTCMMask) == DTCMBase) CodeRegion = Mem9_DTCM; + else CodeRegion = NDS.ARM9Regions[addr >> 14]; RegionCodeCycles = MemTimings[addr >> 12][0]; @@ -1255,6 +1259,57 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) } +void ARMv5::AddCycles_CDI() +{ + // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early + s32 early; + switch (DataRegion) + { + case 0: // background region; CHECKME + case Mem9_DTCM: + case Mem9_BIOS: + case Mem9_WRAM: + case Mem9_IO: + case Mem9_Pal: // CHECKME + default: + early = 2; + break; + + case Mem9_OAM: // CHECKME + case Mem9_GBAROM: + case Mem9_GBARAM: + early = 4; + break; + + case Mem9_MainRAM: + early = (CodeRegion == Mem9_MainRAM) ? 0 : 4; + break; + + case Mem9_VRAM: // the dsi can toggle the bus width of vram between 32 and 16 bit + early = (NDS.ConsoleType == 0 || !(((DSi&)NDS).SCFG_EXT[0] & (1<<13))) ? 4 : 2; + break; + + case Mem9_ITCM: // itcm data fetches cannot be done at the same time as a code fetch, it'll even incurr a 1 cycle penalty when executing from itcm + early = (CodeRegion == Mem9_ITCM) ? -1 : 0; + break; + } + + if (numD > early) + { + numC -= early; + if (numC < 0) numC = 0; + Cycles += numC + numD; + } + else + { + Cycles += numC; + } +} + void ARMv4::AddCycles_C() { // code only. this code fetch is sequential. diff --git a/src/ARM.h b/src/ARM.h index 3ef0d439..25a96ef2 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -325,18 +325,7 @@ public: Cycles += numC + numI; } - void AddCycles_CDI() override - { - // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. - // TODO: ITCM data fetches shouldn't be parallelized, they say - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - //if (DataRegion != CodeRegion) - Cycles += std::max(numC + numD - 6, std::max(numC, numD)); - //else - // Cycles += numC + numD; - } + void AddCycles_CDI() override; void AddCycles_CD() override { diff --git a/src/CP15.cpp b/src/CP15.cpp index 7b11696b..319ac9c4 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -815,22 +815,23 @@ bool ARMv5::DataRead8(u32 addr, u32* val) return false; } - DataRegion = addr; - if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } - + *val = BusRead8(addr); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -843,24 +844,25 @@ bool ARMv5::DataRead16(u32 addr, u32* val) return false; } - DataRegion = addr; - addr &= ~1; if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } - + *val = BusRead16(addr); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -873,24 +875,25 @@ bool ARMv5::DataRead32(u32 addr, u32* val) return false; } - DataRegion = addr; - addr &= ~3; if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } *val = BusRead32(addr); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][2]; return true; } From dbe00e72ddfc0fc2b342c2afbaeda13294c7763b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 24 Jun 2024 22:50:04 -0400 Subject: [PATCH 044/115] improve stm timings need to verify if they apply to all store instructions --- src/ARM.cpp | 42 ++++++++++++++++++++++++++++++------------ src/ARM.h | 12 +----------- src/CP15.cpp | 15 +++++++++------ 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index cb72dad5..907a4790 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1259,6 +1259,31 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) } +void ARMv5::AddCycles_CD() +{ + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + s32 early; + if (DataRegion == Mem9_ITCM) + { + early = (CodeRegion == Mem9_ITCM) ? -1 : 0; + } + else if (DataRegion == Mem9_DTCM) + { + early = 2; + } + else if (DataRegion == Mem9_MainRAM) + { + early = (CodeRegion == Mem9_MainRAM) ? 0 : 18; // CHECKME: how early can main ram be? + } + else early = (DataRegion == CodeRegion) ? 4 : 6; + + s32 code = numC - early; + if (code < 0) code = 0; + Cycles += std::max(code + numD, numC); +} + void ARMv5::AddCycles_CDI() { // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. @@ -1269,7 +1294,7 @@ void ARMv5::AddCycles_CDI() s32 early; switch (DataRegion) { - case 0: // background region; CHECKME + case 0: // background region; case Mem9_DTCM: case Mem9_BIOS: case Mem9_WRAM: @@ -1297,17 +1322,10 @@ void ARMv5::AddCycles_CDI() early = (CodeRegion == Mem9_ITCM) ? -1 : 0; break; } - - if (numD > early) - { - numC -= early; - if (numC < 0) numC = 0; - Cycles += numC + numD; - } - else - { - Cycles += numC; - } + + s32 code = numC - early; + if (code < 0) code = 0; + Cycles += std::max(code + numD, numC); } void ARMv4::AddCycles_C() diff --git a/src/ARM.h b/src/ARM.h index 25a96ef2..68eeb685 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -327,17 +327,7 @@ public: void AddCycles_CDI() override; - void AddCycles_CD() override - { - // TODO: ITCM data fetches shouldn't be parallelized, they say - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - //if (DataRegion != CodeRegion) - Cycles += std::max(numC + numD - 6, std::max(numC, numD)); - //else - // Cycles += numC + numD; - } + void AddCycles_CD() override; #ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles diff --git a/src/CP15.cpp b/src/CP15.cpp index 319ac9c4..06e01e83 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -934,10 +934,9 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) return false; } - DataRegion = addr; - if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -945,12 +944,14 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite8(addr, val); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -963,12 +964,11 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) return false; } - DataRegion = addr; - addr &= ~1; if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -976,12 +976,14 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite16(addr, val); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -994,12 +996,11 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) return false; } - DataRegion = addr; - addr &= ~3; if (addr < ITCMSize) { + DataRegion = Mem9_ITCM; DataCycles = 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -1007,12 +1008,14 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) } if ((addr & DTCMMask) == DTCMBase) { + DataRegion = Mem9_DTCM; DataCycles = 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite32(addr, val); + DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][2]; return true; } From 541e1e6388537790a0bd2d8a515be3de5c52956d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 25 Jun 2024 09:08:11 -0400 Subject: [PATCH 045/115] proper timings for ldr/str --- src/ARM.cpp | 60 +++++++++++++++-- src/ARM.h | 21 ++++-- src/ARMInterpreter_LoadStore.cpp | 111 ++++++++++++++++--------------- 3 files changed, 126 insertions(+), 66 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 907a4790..644a58a2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1259,7 +1259,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) } -void ARMv5::AddCycles_CD() +void ARMv5::AddCycles_CD_STR() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles; @@ -1267,7 +1267,7 @@ void ARMv5::AddCycles_CD() s32 early; if (DataRegion == Mem9_ITCM) { - early = (CodeRegion == Mem9_ITCM) ? -1 : 0; + early = (CodeRegion == Mem9_ITCM) ? 0 : 2; } else if (DataRegion == Mem9_DTCM) { @@ -1284,9 +1284,61 @@ void ARMv5::AddCycles_CD() Cycles += std::max(code + numD, numC); } -void ARMv5::AddCycles_CDI() +void ARMv5::AddCycles_CD_STM() { - // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + s32 early; + if (DataRegion == Mem9_ITCM) + { + early = (CodeRegion == Mem9_ITCM) ? -1 : 0; // stm adds either: no penalty or benefit to itcm loads, or a 1 cycle penalty if executing from itcm. + } + else if (DataRegion == Mem9_DTCM) + { + early = 2; + } + else if (DataRegion == Mem9_MainRAM) + { + early = (CodeRegion == Mem9_MainRAM) ? 0 : 18; // CHECKME: how early can main ram be? + } + else early = (DataRegion == CodeRegion) ? 4 : 6; + + s32 code = numC - early; + if (code < 0) code = 0; + Cycles += std::max(code + numD, numC); +} + +void ARMv5::AddCycles_CDI_LDR() +{ + // LDR cycles. ARM9 seems to skip the internal cycle here. + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early + s32 early; + if (DataRegion == Mem9_ITCM) + { + early = (CodeRegion == Mem9_ITCM) ? 0 : 2; + } + else if (DataRegion == Mem9_DTCM) + { + early = 2; + } + else if (DataRegion == Mem9_MainRAM) + { + early = (CodeRegion == Mem9_MainRAM) ? 0 : 6; + } + else early = 6; + + s32 code = numC - early; + if (code < 0) code = 0; + Cycles += std::max(code + numD, numC); +} + +void ARMv5::AddCycles_CDI_LDM() +{ + // LDM cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles; diff --git a/src/ARM.h b/src/ARM.h index 68eeb685..38f60c6f 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -142,8 +142,10 @@ public: virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; - virtual void AddCycles_CDI() = 0; - virtual void AddCycles_CD() = 0; + virtual void AddCycles_CDI_LDR() = 0; + virtual void AddCycles_CDI_LDM() = 0; + virtual void AddCycles_CD_STR() = 0; + virtual void AddCycles_CD_STM() = 0; /* inline void AddCycles_L(const u32 delay, const u32 reg1) @@ -325,9 +327,10 @@ public: Cycles += numC + numI; } - void AddCycles_CDI() override; - - void AddCycles_CD() override; + void AddCycles_CDI_LDR() override; + void AddCycles_CDI_LDM() override; + void AddCycles_CD_STR() override; + void AddCycles_CD_STM() override; #ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles @@ -460,8 +463,12 @@ public: bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; - void AddCycles_CDI() override; - void AddCycles_CD() override; + void AddCycles_CDI(); + void AddCycles_CDI_LDR() override { AddCycles_CDI(); } + void AddCycles_CDI_LDM() override { AddCycles_CDI(); } + void AddCycles_CD(); + void AddCycles_CD_STR() override { AddCycles_CD(); } + void AddCycles_CD_STM() override { AddCycles_CD(); } #ifdef INTERLOCK // fetch the value of a register while handling any interlock cycles diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 3fac1963..dd7f9762 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -66,7 +66,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(offset, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -77,7 +77,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(addr, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -86,7 +86,7 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(offset, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -96,14 +96,14 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(addr, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = ROR(val, ((offset&0x3)<<3)); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ @@ -122,7 +122,7 @@ namespace melonDS::ARMInterpreter #define A_LDR_POST \ u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = ROR(val, ((addr&0x3)<<3)); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ @@ -140,7 +140,7 @@ namespace melonDS::ARMInterpreter #define A_LDRB \ offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -155,7 +155,7 @@ namespace melonDS::ARMInterpreter #define A_LDRB_POST \ u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -252,7 +252,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(offset, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -261,7 +261,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(addr, storeval); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -272,9 +272,9 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDM(); \ if (dataabort) return; \ if (r == 14) \ cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ @@ -290,9 +290,9 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDM(); \ if (dataabort) return; \ if (r == 14) \ cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ @@ -311,7 +311,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dataabort = !cpu->DataWrite32(offset, cpu->GetReg(r)); /* yes, this data abort behavior is on purpose */ \ u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -323,14 +323,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dataabort = !cpu->DataWrite32(addr, cpu->GetReg(r)); \ u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ - cpu->AddCycles_CD(); \ + cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ cpu->JumpTo8_16Bit(val); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRH_POST \ u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ cpu->JumpTo8_16Bit(val); \ @@ -358,7 +358,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSB \ offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -373,7 +373,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSB_POST \ u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -388,7 +388,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSH \ offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -403,7 +403,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSH_POST \ u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI(); \ + cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -462,7 +462,7 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) @@ -484,10 +484,10 @@ void A_SWP(ARM* cpu) else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); } - else cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI_LDR(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI_LDR(); } void A_SWPB(ARM* cpu) @@ -502,7 +502,7 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) @@ -516,10 +516,10 @@ void A_SWPB(ARM* cpu) else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? cpu->JumpTo(val & ~1); } - else cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI_LDR(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI(); + else cpu->AddCycles_CDI_LDR(); } @@ -582,7 +582,7 @@ void A_LDM(ARM* cpu) { goto dataabort; } - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); if (!preinc) base += 4; @@ -591,7 +591,7 @@ void A_LDM(ARM* cpu) } else { - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); if (cpu->Num == 0) { @@ -635,7 +635,7 @@ void A_LDM(ARM* cpu) if (false) { dataabort: - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); // CHECKME: interlock shouldn't apply when it data aborts, right? // switch back to original set of regs @@ -728,7 +728,7 @@ void A_STM(ARM* cpu) cpu->R[baseid] = oldbase; } - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STM(); } @@ -743,7 +743,7 @@ void T_LDR_PCREL(ARM* cpu) // checkme: can pc be interlocked? u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -753,7 +753,7 @@ void T_STR_REG(ARM* cpu) u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataWrite32(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_STRB_REG(ARM* cpu) @@ -761,7 +761,7 @@ void T_STRB_REG(ARM* cpu) u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataWrite8(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDR_REG(ARM* cpu) @@ -772,7 +772,7 @@ void T_LDR_REG(ARM* cpu) if (cpu->DataRead32(addr, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, (addr & 3) ? 2 : 1, cpu->ILT_Norm); } @@ -781,7 +781,7 @@ void T_LDRB_REG(ARM* cpu) u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -791,7 +791,7 @@ void T_STRH_REG(ARM* cpu) u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataWrite16(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDRSB_REG(ARM* cpu) @@ -800,7 +800,7 @@ void T_LDRSB_REG(ARM* cpu) if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -809,7 +809,7 @@ void T_LDRH_REG(ARM* cpu) u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -819,7 +819,7 @@ void T_LDRSH_REG(ARM* cpu) if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -830,7 +830,7 @@ void T_STR_IMM(ARM* cpu) offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataWrite32(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDR_IMM(ARM* cpu) @@ -841,7 +841,7 @@ void T_LDR_IMM(ARM* cpu) u32 val; if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm); } @@ -851,7 +851,7 @@ void T_STRB_IMM(ARM* cpu) offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataWrite8(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDRB_IMM(ARM* cpu) @@ -860,7 +860,7 @@ void T_LDRB_IMM(ARM* cpu) offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -871,7 +871,7 @@ void T_STRH_IMM(ARM* cpu) offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataWrite16(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDRH_IMM(ARM* cpu) @@ -880,7 +880,7 @@ void T_LDRH_IMM(ARM* cpu) offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } @@ -891,7 +891,7 @@ void T_STR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? offset += cpu->R[13]; cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1)); - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STR(); } void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? @@ -900,7 +900,7 @@ void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? offset += cpu->R[13]; cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDR(); cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -949,7 +949,7 @@ void T_PUSH(ARM* cpu) cpu->R[13] = wbbase; dataabort: - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STM(); } void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? @@ -986,7 +986,8 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? } cpu->R[13] = base; - + + cpu->AddCycles_CDI_LDM(); if (cpu->Num == 0) { u32 lastbase = base - 4; @@ -997,7 +998,7 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? return; dataabort: - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); } void T_STMIA(ARM* cpu) @@ -1022,7 +1023,7 @@ void T_STMIA(ARM* cpu) // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; dataabort: - cpu->AddCycles_CD(); + cpu->AddCycles_CD_STM(); } void T_LDMIA(ARM* cpu) @@ -1050,7 +1051,7 @@ void T_LDMIA(ARM* cpu) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); if (cpu->Num == 0) { u32 lastbase = base - 4; @@ -1061,7 +1062,7 @@ void T_LDMIA(ARM* cpu) return; dataabort: - cpu->AddCycles_CDI(); + cpu->AddCycles_CDI_LDM(); } From c5b035a97314d10f7cfa54de8ead946acaf43dee Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 25 Jun 2024 11:20:01 -0400 Subject: [PATCH 046/115] SWP and SWPB use the same behavior as STR on the ARM9 --- src/ARM.h | 3 +++ src/ARMInterpreter_LoadStore.cpp | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 38f60c6f..cb47f287 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -144,6 +144,7 @@ public: virtual void AddCycles_CI(s32 numI) = 0; virtual void AddCycles_CDI_LDR() = 0; virtual void AddCycles_CDI_LDM() = 0; + virtual void AddCycles_CDI_SWP() = 0; virtual void AddCycles_CD_STR() = 0; virtual void AddCycles_CD_STM() = 0; @@ -329,6 +330,7 @@ public: void AddCycles_CDI_LDR() override; void AddCycles_CDI_LDM() override; + void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str void AddCycles_CD_STR() override; void AddCycles_CD_STM() override; @@ -466,6 +468,7 @@ public: void AddCycles_CDI(); void AddCycles_CDI_LDR() override { AddCycles_CDI(); } void AddCycles_CDI_LDM() override { AddCycles_CDI(); } + void AddCycles_CDI_SWP() override { AddCycles_CDI(); } // checkme? void AddCycles_CD(); void AddCycles_CD_STR() override { AddCycles_CD(); } void AddCycles_CD_STM() override { AddCycles_CD(); } diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index dd7f9762..d874fb9a 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -462,7 +462,7 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) @@ -484,10 +484,10 @@ void A_SWP(ARM* cpu) else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); } - else cpu->AddCycles_CDI_LDR(); + else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_LDR(); + else cpu->AddCycles_CDI_SWP(); } void A_SWPB(ARM* cpu) @@ -502,7 +502,7 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; if (rd != 15) @@ -516,10 +516,10 @@ void A_SWPB(ARM* cpu) else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? cpu->JumpTo(val & ~1); } - else cpu->AddCycles_CDI_LDR(); + else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_LDR(); + else cpu->AddCycles_CDI_SWP(); } From 88e5584b5f018458b0a683aa33d6d6e367a1a8f0 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 27 Jun 2024 13:02:38 -0400 Subject: [PATCH 047/115] fix clz r15 --- src/ARMInterpreter_ALU.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 44ee84aa..51b219d7 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1078,7 +1078,8 @@ void A_CLZ(ARM* cpu) val |= 0x1; } - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + if ((cpu->CurInstr >> 12) & 0xF == 15) cpu->JumpTo(res & ~1); + else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); } From a549977eb0c4823a1a900f60383e4d003402a3af Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:04:38 -0400 Subject: [PATCH 048/115] fix clz for realsies --- src/ARMInterpreter_ALU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 3e94d2af..e3208668 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1078,7 +1078,7 @@ void A_CLZ(ARM* cpu) val |= 0x1; } - if ((cpu->CurInstr >> 12) & 0xF == 15) cpu->JumpTo(res & ~1); + if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); } From bd1665c1d3602a3d7f6b327ddb0ecf0982340047 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:15:37 -0400 Subject: [PATCH 049/115] minor timing tweaks --- src/ARM.cpp | 2 +- src/ARM.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 3ecae2c8..f667e0f6 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -647,7 +647,7 @@ void ARMv5::Execute() R[15] += 2; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } + if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 1; } else NextInstr[1] = CodeRead32(R[15], false); // actually execute diff --git a/src/ARM.h b/src/ARM.h index 8efb8fa6..20d11ad2 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -317,15 +317,16 @@ public: void AddCycles_C() override { // code only. always nonseq 32-bit for ARM9. - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numC = CodeCycles; Cycles += numC; } void AddCycles_CI(s32 numI) override { // code+internal - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - Cycles += numC + numI; + s32 numC = CodeCycles; + numI += 1; + Cycles += std::max(numC, numI); } void AddCycles_CDI_LDR() override; From ea429a1b8d04b53c0e0d9c33bdba8e613de0e88d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 4 Jul 2024 12:58:58 -0400 Subject: [PATCH 050/115] improve interlock emulation add cycles to the instruction execution time rather than the timestamp directly. --- src/ARM.cpp | 9 +++++---- src/ARM.h | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index f667e0f6..7d5a02c7 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -696,6 +696,7 @@ void ARMv5::Execute() NDS.ARM9Timestamp += Cycles; Cycles = 0; + CyclesILed = 0; } if (Halted == 2) @@ -1262,7 +1263,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) void ARMv5::AddCycles_CD_STR() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; + s32 numD = DataCycles + CyclesILed; s32 early; if (DataRegion == Mem9_ITCM) @@ -1287,7 +1288,7 @@ void ARMv5::AddCycles_CD_STR() void ARMv5::AddCycles_CD_STM() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; + s32 numD = DataCycles + CyclesILed; s32 early; if (DataRegion == Mem9_ITCM) @@ -1313,7 +1314,7 @@ void ARMv5::AddCycles_CDI_LDR() { // LDR cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; + s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; @@ -1340,7 +1341,7 @@ void ARMv5::AddCycles_CDI_LDM() { // LDM cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; + s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; diff --git a/src/ARM.h b/src/ARM.h index 20d11ad2..a76a6d09 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,7 +30,7 @@ #include "debug/GdbStub.h" #endif -//#define INTERLOCK +#define INTERLOCK namespace melonDS { @@ -318,14 +318,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = CodeCycles; - Cycles += numC; + Cycles += std::max(numC, CyclesILed + 1); } void AddCycles_CI(s32 numI) override { // code+internal s32 numC = CodeCycles; - numI += 1; + numI += 1 + CyclesILed; Cycles += std::max(numC, numI); } @@ -340,7 +340,7 @@ public: inline u32 GetReg(const u32 reg, const u32 delay = 0) override { if (InterlockTimestamp[reg] > (Timestamp() + delay)) - Timestamp() = InterlockTimestamp[reg] - delay; + CyclesILed = InterlockTimestamp[reg] - (Timestamp() + delay); return R[reg]; } @@ -417,6 +417,8 @@ public: bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region); + s32 CyclesILed; + #ifdef GDBSTUB_ENABLED u32 ReadMem(u32 addr, int size) override; void WriteMem(u32 addr, int size, u32 v) override; From 0f02c0bbbad3828638ff22f49e57ae6c21e51e2e Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 6 Jul 2024 12:13:41 -0400 Subject: [PATCH 051/115] disable interlock emulation again again our understanding of how it works is just too incomplete to be worth implementing yet --- src/ARM.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARM.h b/src/ARM.h index a76a6d09..25889329 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,7 +30,7 @@ #include "debug/GdbStub.h" #endif -#define INTERLOCK +//#define INTERLOCK namespace melonDS { From 383750692e95fb40f60cca0b3208af5414ad53eb Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 6 Jul 2024 12:38:39 -0400 Subject: [PATCH 052/115] doesn't really matter but idk it's more correct? --- src/ARM.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7d5a02c7..2c56d505 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1262,7 +1262,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) void ARMv5::AddCycles_CD_STR() { - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numC = CodeCycles; s32 numD = DataCycles + CyclesILed; s32 early; @@ -1287,7 +1287,7 @@ void ARMv5::AddCycles_CD_STR() void ARMv5::AddCycles_CD_STM() { - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numC = CodeCycles; s32 numD = DataCycles + CyclesILed; s32 early; @@ -1313,7 +1313,7 @@ void ARMv5::AddCycles_CD_STM() void ARMv5::AddCycles_CDI_LDR() { // LDR cycles. ARM9 seems to skip the internal cycle here. - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numC = CodeCycles; s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early @@ -1340,7 +1340,7 @@ void ARMv5::AddCycles_CDI_LDR() void ARMv5::AddCycles_CDI_LDM() { // LDM cycles. ARM9 seems to skip the internal cycle here. - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numC = CodeCycles; s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early From e2be0b4f93cfe310a53dfc034a7d4f047a02962e Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 7 Jul 2024 15:41:14 -0400 Subject: [PATCH 053/115] actually no it was not more correct undo previous commit because actually code cycles *do* matter --- src/ARM.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 2c56d505..7d5a02c7 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1262,7 +1262,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) void ARMv5::AddCycles_CD_STR() { - s32 numC = CodeCycles; + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles + CyclesILed; s32 early; @@ -1287,7 +1287,7 @@ void ARMv5::AddCycles_CD_STR() void ARMv5::AddCycles_CD_STM() { - s32 numC = CodeCycles; + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles + CyclesILed; s32 early; @@ -1313,7 +1313,7 @@ void ARMv5::AddCycles_CD_STM() void ARMv5::AddCycles_CDI_LDR() { // LDR cycles. ARM9 seems to skip the internal cycle here. - s32 numC = CodeCycles; + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early @@ -1340,7 +1340,7 @@ void ARMv5::AddCycles_CDI_LDR() void ARMv5::AddCycles_CDI_LDM() { // LDM cycles. ARM9 seems to skip the internal cycle here. - s32 numC = CodeCycles; + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; s32 numD = DataCycles + CyclesILed; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early From 1fdac1d489089019b1d495ce948e40ab21ab98e8 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 11 Jul 2024 16:18:55 -0400 Subject: [PATCH 054/115] ...why am i checking for dtcm? --- src/ARM.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7d5a02c7..7e0e9228 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -304,7 +304,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) u32 newregion = addr >> 24; if (addr < ITCMSize) CodeRegion = Mem9_ITCM; - else if ((addr & DTCMMask) == DTCMBase) CodeRegion = Mem9_DTCM; else CodeRegion = NDS.ARM9Regions[addr >> 14]; RegionCodeCycles = MemTimings[addr >> 12][0]; From 038ffa3a3598c03b156dc8626f46738fead16728 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 11 Jul 2024 20:06:56 -0400 Subject: [PATCH 055/115] revert the *entire* interlock implemention too slow, not accurate enough. we need to do a *lot* more research into the specifics of how this works with all the various aspects of the cpu's timings before we can make a good implementation --- src/ARM.cpp | 21 +- src/ARM.h | 88 +------- src/ARMInterpreter.cpp | 4 +- src/ARMInterpreter_ALU.cpp | 351 +++++++++++++++---------------- src/ARMInterpreter_Branch.cpp | 16 +- src/ARMInterpreter_LoadStore.cpp | 344 ++++++++++-------------------- 6 files changed, 290 insertions(+), 534 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7e0e9228..e1f93a58 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -190,8 +190,6 @@ void ARM::Reset() BreakReq = false; #endif - memset(InterlockTimestamp, 0, sizeof(InterlockTimestamp)); - // zorp JumpTo(ExceptionBase); } @@ -695,7 +693,6 @@ void ARMv5::Execute() NDS.ARM9Timestamp += Cycles; Cycles = 0; - CyclesILed = 0; } if (Halted == 2) @@ -1262,7 +1259,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) void ARMv5::AddCycles_CD_STR() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; s32 early; if (DataRegion == Mem9_ITCM) @@ -1287,7 +1284,7 @@ void ARMv5::AddCycles_CD_STR() void ARMv5::AddCycles_CD_STM() { s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; s32 early; if (DataRegion == Mem9_ITCM) @@ -1313,7 +1310,7 @@ void ARMv5::AddCycles_CDI_LDR() { // LDR cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; @@ -1340,7 +1337,7 @@ void ARMv5::AddCycles_CDI_LDM() { // LDM cycles. ARM9 seems to skip the internal cycle here. s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles + CyclesILed; + s32 numD = DataCycles; // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early s32 early; @@ -1442,16 +1439,6 @@ void ARMv4::AddCycles_CD() } } -u64& ARMv5::Timestamp() -{ - return NDS.ARM9Timestamp; -} - -u64& ARMv4::Timestamp() -{ - return NDS.ARM7Timestamp; -} - u8 ARMv5::BusRead8(u32 addr) { return NDS.ARM9Read8(addr); diff --git a/src/ARM.h b/src/ARM.h index 25889329..dae5d96a 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -30,8 +30,6 @@ #include "debug/GdbStub.h" #endif -//#define INTERLOCK - namespace melonDS { inline u32 ROR(u32 x, u32 n) @@ -148,46 +146,6 @@ public: virtual void AddCycles_CD_STR() = 0; virtual void AddCycles_CD_STM() = 0; -/* - inline void AddCycles_L(const u32 delay, const u32 reg1) - { - if (InterlockTimestamp[reg1] > Timestamp() + delay); - Timestamp() = InterlockTimestamp[reg1]; - } - - inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2) - { - u64 cycles = std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]); - if (cycles > Timestamp() + delay) - Timestamp() = cycles; - } - - inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2, const u32 reg3) - { - u64 cycles = std::max(InterlockTimestamp[reg1], std::max(InterlockTimestamp[reg2], InterlockTimestamp[reg3])); - if (cycles > Timestamp() + delay) - Timestamp() = cycles; - }*/ - -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - virtual inline u32 GetReg(const u32 reg, const u32 delay = 0) = 0; - - // Must be called after all of an instruction's cycles are calculated!!! - virtual inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) = 0; -#else - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) - { - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) {} -#endif - - virtual u64& Timestamp() = 0; - void CheckGdbIncoming(); u32 Num; @@ -224,15 +182,6 @@ public: MemRegion CodeMem; - enum InterlockType - { - ILT_Norm = 0, - ILT_Mul = 1, - }; - - u8 InterlockType[16]; - u64 InterlockTimestamp[16]; - #ifdef JIT_ENABLED u32 FastBlockLookupStart, FastBlockLookupSize; u64* FastBlockLookup; @@ -318,14 +267,14 @@ public: { // code only. always nonseq 32-bit for ARM9. s32 numC = CodeCycles; - Cycles += std::max(numC, CyclesILed + 1); + Cycles += numC; } void AddCycles_CI(s32 numI) override { // code+internal s32 numC = CodeCycles; - numI += 1 + CyclesILed; + numI += 1; Cycles += std::max(numC, numI); } @@ -334,25 +283,6 @@ public: void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str void AddCycles_CD_STR() override; void AddCycles_CD_STM() override; - -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) override - { - if (InterlockTimestamp[reg] > (Timestamp() + delay)) - CyclesILed = InterlockTimestamp[reg] - (Timestamp() + delay); - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override - { - InterlockTimestamp[reg] = cycles + Timestamp() + Cycles; - //InterlockType[reg] = type; - } -#endif - - u64& Timestamp() override; void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -417,8 +347,6 @@ public: bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region); - s32 CyclesILed; - #ifdef GDBSTUB_ENABLED u32 ReadMem(u32 addr, int size) override; void WriteMem(u32 addr, int size, u32 v) override; @@ -476,18 +404,6 @@ public: void AddCycles_CD_STR() override { AddCycles_CD(); } void AddCycles_CD_STM() override { AddCycles_CD(); } -#ifdef INTERLOCK - // fetch the value of a register while handling any interlock cycles - inline u32 GetReg(const u32 reg, const u32 delay = 0) override - { - return R[reg]; - } - - // Must be called after all of an instruction's cycles are calculated!!! - inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override{} -#endif - - u64& Timestamp() override; protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 93b347b5..f9623147 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -163,7 +163,7 @@ void A_MSR_REG(ARM* cpu) if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; - u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 val = cpu->R[cpu->CurInstr & 0xF]; // bit4 is forced to 1 val |= 0x00000010; @@ -216,7 +216,7 @@ void A_MCR(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; - u32 val = cpu->GetReg((cpu->CurInstr>>12)&0xF); + u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (cpu->Num==0 && cp==15) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index e3208668..bc655996 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -160,14 +160,14 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry) cpu->SetC(b & 0x80000000); #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ - u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 s = (cpu->CurInstr>>7)&0x1F; \ shiftop(b, s); #define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ - u32 b = cpu->GetReg(cpu->CurInstr&0xF); \ + u32 b = cpu->R[cpu->CurInstr&0xF]; \ if ((cpu->CurInstr&0xF)==15) b += 4; \ - shiftop(b, (cpu->GetReg((cpu->CurInstr>>8)&0xF) & 0xFF)); + shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); #define A_IMPLEMENT_ALU_OP(x,s) \ @@ -313,7 +313,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ #define A_AND(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \ } #define A_AND_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) #define A_EOR(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -357,7 +357,7 @@ A_IMPLEMENT_ALU_OP(AND,_S) } #define A_EOR_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -375,7 +375,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) #define A_SUB(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -388,7 +388,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S) } #define A_SUB_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -408,7 +408,7 @@ A_IMPLEMENT_ALU_OP(SUB,) #define A_RSB(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -421,7 +421,7 @@ A_IMPLEMENT_ALU_OP(SUB,) } #define A_RSB_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -441,7 +441,7 @@ A_IMPLEMENT_ALU_OP(RSB,) #define A_ADD(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -454,7 +454,7 @@ A_IMPLEMENT_ALU_OP(RSB,) } #define A_ADD_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -474,7 +474,7 @@ A_IMPLEMENT_ALU_OP(ADD,) #define A_ADC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -487,7 +487,7 @@ A_IMPLEMENT_ALU_OP(ADD,) } #define A_ADC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a + b; \ u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \ u32 res = res_tmp + carry; \ @@ -509,7 +509,7 @@ A_IMPLEMENT_ALU_OP(ADC,) #define A_SBC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -522,7 +522,7 @@ A_IMPLEMENT_ALU_OP(ADC,) } #define A_SBC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = a - b; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -544,7 +544,7 @@ A_IMPLEMENT_ALU_OP(SBC,) #define A_RSC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -557,7 +557,7 @@ A_IMPLEMENT_ALU_OP(SBC,) } #define A_RSC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res_tmp = b - a; \ u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = res_tmp - carry; \ @@ -579,7 +579,7 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -589,7 +589,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -599,7 +599,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ cpu->SetNZCV(res & 0x80000000, \ !res, \ @@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) #define A_ORR(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -636,7 +636,7 @@ A_IMPLEMENT_ALU_TEST(CMN,) } #define A_ORR_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a | b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -699,7 +699,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) #define A_BIC(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ @@ -712,7 +712,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu) } #define A_BIC_S(c) \ - u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & ~b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ @@ -761,12 +761,18 @@ A_IMPLEMENT_ALU_OP(MVN,_S) void A_MUL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u32 res = rm * rs; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ(res & 0x80000000, + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -780,55 +786,53 @@ void A_MUL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ(res & 0x80000000, - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_MLA(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; u32 res = (rm * rs) + rn; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - - u32 cycles; - if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; - else - { - if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; - else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; - else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; - else cycles = 5; - } - - cpu->AddCycles_CI(cycles); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, !res); if (cpu->Num==1) cpu->SetC(0); } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions + + u32 cycles; + if (cpu->Num == 0) + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + else + { + if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; + else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; + else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; + else cycles = 5; + } + + cpu->AddCycles_CI(cycles); } void A_UMULL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u64 res = (u64)rm * (u64)rs; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -842,27 +846,26 @@ void A_UMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_UMLAL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; u64 res = (u64)rm * (u64)rs; - u64 rd = (u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL); + u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -876,24 +879,23 @@ void A_UMLAL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMULL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -907,27 +909,26 @@ void A_SMULL(ARM* cpu) } cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAL(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; - s64 rd = (s64)((u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL)); + s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) + { + cpu->SetNZ((u32)(res >> 63ULL), + !res); + if (cpu->Num==1) cpu->SetC(0); + } u32 cycles; if (cpu->Num == 0) @@ -939,24 +940,17 @@ void A_SMLAL(ARM* cpu) else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; } - + cpu->AddCycles_CI(cycles); - if (cpu->CurInstr & (1<<20)) - { - cpu->SetNZ((u32)(res >> 63ULL), - !res); - if (cpu->Num==1) cpu->SetC(0); - } - else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions } void A_SMLAxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -970,17 +964,16 @@ void A_SMLAxy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLAWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -992,16 +985,15 @@ void A_SMLAWy(ARM* cpu) if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1011,16 +1003,15 @@ void A_SMULxy(ARM* cpu) u32 res = ((s16)rm * (s16)rs); cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMULWy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<6)) rs >>= 16; else rs &= 0xFFFF; @@ -1028,16 +1019,15 @@ void A_SMULWy(ARM* cpu) u32 res = ((s64)(s32)rm * (s16)rs) >> 16; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_SMLALxy(ARM* cpu) { if (cpu->Num != 0) return; - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 0); - u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 0); // yeah this one actually doesn't need two interlock cycles to interlock + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; if (cpu->CurInstr & (1<<5)) rm >>= 16; else rm &= 0xFFFF; @@ -1052,8 +1042,7 @@ void A_SMLALxy(ARM* cpu) cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->AddCycles_CI(1); - cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_CI(1); // TODO: interlock?? } @@ -1062,7 +1051,7 @@ void A_CLZ(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1); + u32 val = cpu->R[cpu->CurInstr & 0xF]; u32 res = 0; while ((val & 0xFF000000) == 0) @@ -1087,8 +1076,8 @@ void A_QADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 res = rm + rn; if (OverflowAdd(rm, rn)) @@ -1098,16 +1087,15 @@ void A_QADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 res = rm - rn; if (OverflowSub(rm, rn)) @@ -1117,16 +1105,15 @@ void A_QSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDADD(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; if (OverflowAdd(rn, rn)) { @@ -1144,16 +1131,15 @@ void A_QDADD(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } void A_QDSUB(ARM* cpu) { if (cpu->Num != 0) return A_UNK(cpu); - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1); + u32 rm = cpu->R[cpu->CurInstr & 0xF]; + u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF]; if (OverflowAdd(rn, rn)) { @@ -1171,8 +1157,7 @@ void A_QDSUB(ARM* cpu) } cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->AddCycles_C(); - cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm); + cpu->AddCycles_C(); // TODO: interlock?? } @@ -1183,7 +1168,7 @@ void A_QDSUB(ARM* cpu) void T_LSL_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; LSL_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1194,7 +1179,7 @@ void T_LSL_IMM(ARM* cpu) void T_LSR_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; LSR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1205,7 +1190,7 @@ void T_LSR_IMM(ARM* cpu) void T_ASR_IMM(ARM* cpu) { - u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 s = (cpu->CurInstr >> 6) & 0x1F; ASR_IMM_S(op, s); cpu->R[cpu->CurInstr & 0x7] = op; @@ -1216,8 +1201,8 @@ void T_ASR_IMM(ARM* cpu) void T_ADD_REG_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1229,8 +1214,8 @@ void T_ADD_REG_(ARM* cpu) void T_SUB_REG_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1242,7 +1227,7 @@ void T_SUB_REG_(ARM* cpu) void T_ADD_IMM_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a + b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1255,7 +1240,7 @@ void T_ADD_IMM_(ARM* cpu) void T_SUB_IMM_(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 b = (cpu->CurInstr >> 6) & 0x7; u32 res = a - b; cpu->R[cpu->CurInstr & 0x7] = res; @@ -1275,9 +1260,9 @@ void T_MOV_IMM(ARM* cpu) cpu->AddCycles_C(); } -void T_CMP_IMM(ARM* cpu) +void T_CMP_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1289,7 +1274,7 @@ void T_CMP_IMM(ARM* cpu) void T_ADD_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a + b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1302,7 +1287,7 @@ void T_ADD_IMM(ARM* cpu) void T_SUB_IMM(ARM* cpu) { - u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 b = cpu->CurInstr & 0xFF; u32 res = a - b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = res; @@ -1316,8 +1301,8 @@ void T_SUB_IMM(ARM* cpu) void T_AND_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1327,8 +1312,8 @@ void T_AND_REG(ARM* cpu) void T_EOR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a ^ b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1338,8 +1323,8 @@ void T_EOR_REG(ARM* cpu) void T_LSL_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; LSL_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1349,8 +1334,8 @@ void T_LSL_REG(ARM* cpu) void T_LSR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; LSR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1360,8 +1345,8 @@ void T_LSR_REG(ARM* cpu) void T_ASR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; ASR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1371,8 +1356,8 @@ void T_ASR_REG(ARM* cpu) void T_ADC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res_tmp = a + b; u32 carry = (cpu->CPSR&0x20000000 ? 1:0); u32 res = res_tmp + carry; @@ -1386,8 +1371,8 @@ void T_ADC_REG(ARM* cpu) void T_SBC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res_tmp = a - b; u32 carry = (cpu->CPSR&0x20000000 ? 0:1); u32 res = res_tmp - carry; @@ -1401,8 +1386,8 @@ void T_SBC_REG(ARM* cpu) void T_ROR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF; + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF; ROR_REG_S(a, b); cpu->R[cpu->CurInstr & 0x7] = a; cpu->SetNZ(a & 0x80000000, @@ -1412,8 +1397,8 @@ void T_ROR_REG(ARM* cpu) void T_TST_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & b; cpu->SetNZ(res & 0x80000000, !res); @@ -1422,7 +1407,7 @@ void T_TST_REG(ARM* cpu) void T_NEG_REG(ARM* cpu) { - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = -b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZCV(res & 0x80000000, @@ -1434,8 +1419,8 @@ void T_NEG_REG(ARM* cpu) void T_CMP_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1446,8 +1431,8 @@ void T_CMP_REG(ARM* cpu) void T_CMN_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a + b; cpu->SetNZCV(res & 0x80000000, !res, @@ -1458,8 +1443,8 @@ void T_CMN_REG(ARM* cpu) void T_ORR_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a | b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1469,8 +1454,8 @@ void T_ORR_REG(ARM* cpu) void T_MUL_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a * b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1494,8 +1479,8 @@ void T_MUL_REG(ARM* cpu) void T_BIC_REG(ARM* cpu) { - u32 a = cpu->GetReg(cpu->CurInstr & 0x7); - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 a = cpu->R[cpu->CurInstr & 0x7]; + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = a & ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1505,7 +1490,7 @@ void T_BIC_REG(ARM* cpu) void T_MVN_REG(ARM* cpu) { - u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 res = ~b; cpu->R[cpu->CurInstr & 0x7] = res; cpu->SetNZ(res & 0x80000000, @@ -1522,8 +1507,8 @@ void T_ADD_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->GetReg(rd); - u32 b = cpu->GetReg(rs); + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; cpu->AddCycles_C(); @@ -1542,8 +1527,8 @@ void T_CMP_HIREG(ARM* cpu) u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rs = (cpu->CurInstr >> 3) & 0xF; - u32 a = cpu->GetReg(rd); - u32 b = cpu->GetReg(rs); + u32 a = cpu->R[rd]; + u32 b = cpu->R[rs]; u32 res = a - b; cpu->SetNZCV(res & 0x80000000, @@ -1562,11 +1547,11 @@ void T_MOV_HIREG(ARM* cpu) if (rd == 15) { - cpu->JumpTo(cpu->GetReg(rs) | 1); + cpu->JumpTo(cpu->R[rs] | 1); } else { - cpu->R[rd] = cpu->GetReg(rs); + cpu->R[rd] = cpu->R[rs]; } // nocash-style debugging hook @@ -1583,7 +1568,7 @@ void T_MOV_HIREG(ARM* cpu) } -void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock? +void T_ADD_PCREL(ARM* cpu) { u32 val = cpu->R[15] & ~2; val += ((cpu->CurInstr & 0xFF) << 2); @@ -1591,7 +1576,7 @@ void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock? cpu->AddCycles_C(); } -void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? +void T_ADD_SPREL(ARM* cpu) { u32 val = cpu->R[13]; val += ((cpu->CurInstr & 0xFF) << 2); @@ -1599,7 +1584,7 @@ void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thum cpu->AddCycles_C(); } -void T_ADD_SP(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb? +void T_ADD_SP(ARM* cpu) { u32 val = cpu->R[13]; if (cpu->CurInstr & (1<<7)) diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 284dfa75..623be41a 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -46,15 +46,15 @@ void A_BLX_IMM(ARM* cpu) cpu->JumpTo(cpu->R[15] + offset + 1); } -void A_BX(ARM* cpu) // verify interlock +void A_BX(ARM* cpu) { - cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); } -void A_BLX_REG(ARM* cpu) // verify interlock +void A_BLX_REG(ARM* cpu) { u32 lr = cpu->R[15] - 4; - cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF)); + cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); cpu->R[14] = lr; } @@ -71,12 +71,12 @@ void T_BCOND(ARM* cpu) cpu->AddCycles_C(); } -void T_BX(ARM* cpu) // verify interlock +void T_BX(ARM* cpu) { - cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); } -void T_BLX_REG(ARM* cpu) // verify interlock +void T_BLX_REG(ARM* cpu) { if (cpu->Num==1) { @@ -85,7 +85,7 @@ void T_BLX_REG(ARM* cpu) // verify interlock } u32 lr = cpu->R[15] - 1; - cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF)); + cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); cpu->R[14] = lr; } diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 4a640bc5..e2726005 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -53,7 +53,7 @@ namespace melonDS::ARMInterpreter if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_WB_CALC_OFFSET_REG(shiftop) \ - u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ u32 shift = ((cpu->CurInstr>>7)&0x1F); \ shiftop(offset, shift); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; @@ -61,8 +61,8 @@ namespace melonDS::ARMInterpreter #define A_STR \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(offset, storeval); \ @@ -72,8 +72,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STR_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(addr, storeval); \ @@ -82,8 +82,8 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(offset, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -92,8 +92,8 @@ namespace melonDS::ARMInterpreter // TODO: user mode (bit21) #define A_STRB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(addr, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -101,7 +101,7 @@ namespace melonDS::ARMInterpreter cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ @@ -115,12 +115,11 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (offset & 3) ? 2 : 1, cpu->ILT_Norm); \ } // TODO: user mode #define A_LDR_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ @@ -134,37 +133,26 @@ namespace melonDS::ARMInterpreter else \ { \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (addr & 3) ? 2 : 1, cpu->ILT_Norm); \ } #define A_LDRB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; // TODO: user mode #define A_LDRB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; @@ -242,14 +230,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_HD_CALC_OFFSET_REG \ - u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \ + u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; #define A_STRH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(offset, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -257,8 +245,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ - u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ + u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(addr, storeval); \ cpu->AddCycles_CD_STR(); \ @@ -269,47 +257,35 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRD \ if (cpu->Num != 0) return; \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ - u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \ + if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ - if (dataabort) return; \ - if (r == 14) \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else \ - { \ - cpu->R[r+1] = val; \ - cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ - } \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ + if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \ - u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \ + if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ + else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ - if (dataabort) return; \ - if (r == 14) \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ - else \ - { \ - cpu->R[r+1] = val; \ - cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \ - } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ if (cpu->Num != 0) return; \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(offset, cpu->GetReg(r)); /* yes, this data abort behavior is on purpose */ \ - u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ @@ -317,102 +293,72 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRD_POST \ if (cpu->Num != 0) return; \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(addr, cpu->GetReg(r)); \ - u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \ + bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ cpu->AddCycles_CD_STM(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSB \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSB_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRSH \ - offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRSH_POST \ - u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \ + u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ cpu->AddCycles_CDI_LDR(); \ if (dataabort) return; \ val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - cpu->JumpTo8_16Bit(val); \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \ - } \ + if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ + else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -452,8 +398,8 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) void A_SWP(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1); - u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF]; if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -462,38 +408,20 @@ void A_SWP(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite32(base, rm)) { - cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) - { - cpu->R[rd] = ROR(val, 8*(base&0x3)); - - if (cpu->Num == 0) - { - u32 cycles; - if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?) - { - cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2; - } - else cycles = 1; - - cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm); - } - } - else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9? - cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm); + if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); + else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? } - else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI_SWP(); } void A_SWPB(ARM* cpu) { - u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1) & 0xFF; - u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF); + u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; + u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; @@ -502,24 +430,14 @@ void A_SWPB(ARM* cpu) u32 numD = cpu->DataCycles; if (cpu->DataWrite8(base, rm)) { - cpu->AddCycles_CDI_SWP(); // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) - { - cpu->R[rd] = val; - - // add an extra interlock cycle when doing a load from a non-itcm address (checkme: does it matter whether you're executing from there?) - if (cpu->Num == 0) - cpu->SetCycles_L(rd, ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2, cpu->ILT_Norm); - } - else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9? - cpu->JumpTo(val & ~1); + if (rd != 15) cpu->R[rd] = val; + else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? } - else cpu->AddCycles_CDI_SWP(); cpu->DataCycles += numD; } - else cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI_SWP(); } @@ -527,12 +445,11 @@ void A_SWPB(ARM* cpu) void A_LDM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->GetReg(baseid, 1); + u32 base = cpu->R[baseid]; u32 wbbase; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; - u32 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?) if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -568,7 +485,6 @@ void A_LDM(ARM* cpu) } first = false; - lastreg = i; if (!preinc) base += 4; } } @@ -582,26 +498,12 @@ void A_LDM(ARM* cpu) { goto dataabort; } - cpu->AddCycles_CDI_LDM(); if (!preinc) base += 4; if (cpu->Num == 1) pc &= ~0x1; } - else - { - cpu->AddCycles_CDI_LDM(); - - if (cpu->Num == 0) - { - u32 lastbase = base; - if (!preinc) lastbase -= 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - } // switch back to previous regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -635,8 +537,6 @@ void A_LDM(ARM* cpu) if (false) { dataabort: - cpu->AddCycles_CDI_LDM(); - // CHECKME: interlock shouldn't apply when it data aborts, right? // switch back to original set of regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) @@ -645,12 +545,14 @@ void A_LDM(ARM* cpu) // restore original value of base in case the reg got written to cpu->R[baseid] = oldbase; } + + cpu->AddCycles_CDI_LDM(); } void A_STM(ARM* cpu) { u32 baseid = (cpu->CurInstr >> 16) & 0xF; - u32 base = cpu->GetReg(baseid, 1); + u32 base = cpu->R[baseid]; u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; @@ -694,7 +596,7 @@ void A_STM(ARM* cpu) val = oldbase; else val = base; } - else val = cpu->GetReg(i, 1+cpu->DataCycles); + else val = cpu->R[i]; if (i == 15) val+=4; @@ -738,170 +640,160 @@ void A_STM(ARM* cpu) -void T_LDR_PCREL(ARM* cpu) // checkme: can pc be interlocked? +void T_LDR_PCREL(ARM* cpu) { u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } -void T_STR_REG(ARM* cpu) +void T_STR_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite32(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_STRB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite8(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDR_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; u32 val; if (cpu->DataRead32(addr, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, (addr & 3) ? 2 : 1, cpu->ILT_Norm); } void T_LDRB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STRH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); - cpu->DataWrite16(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; + cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRSB_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_LDRSH_REG(ARM* cpu) { - u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7); + u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite32(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); - cpu->AddCycles_CD_STR(); + cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); + cpu->AddCycles_CD_LDR(); } void T_LDR_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; u32 val; if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm); } void T_STRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite8(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRB_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } void T_STRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - cpu->DataWrite16(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1)); + cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CD_STR(); } void T_LDRH_IMM(ARM* cpu) { u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7); + offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm); } -void T_STR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_STR_SPREL(ARM* cpu) { u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; - cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1)); + cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CD_STR(); } -void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_LDR_SPREL(ARM* cpu) { u32 offset = (cpu->CurInstr << 2) & 0x3FC; offset += cpu->R[13]; cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI_LDR(); - cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count } @@ -919,7 +811,7 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) nregs++; - u32 base = cpu->GetReg(13); + u32 base = cpu->R[13]; base -= (nregs<<2); u32 wbbase = base; @@ -927,8 +819,8 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->GetReg(i, 1)) - : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) // verify interlock + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) { goto dataabort; } @@ -952,11 +844,10 @@ void T_PUSH(ARM* cpu) cpu->AddCycles_CD_STM(); } -void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? +void T_POP(ARM* cpu) { u32 base = cpu->R[13]; bool first = true; - u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -986,16 +877,6 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? } cpu->R[13] = base; - - cpu->AddCycles_CDI_LDM(); - if (cpu->Num == 0) - { - u32 lastbase = base - 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - return; dataabort: cpu->AddCycles_CDI_LDM(); @@ -1003,15 +884,15 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode? void T_STMIA(ARM* cpu) { - u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->GetReg(i, 1)) - : cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) + if (!(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i]))) { goto dataabort; } @@ -1028,9 +909,8 @@ void T_STMIA(ARM* cpu) void T_LDMIA(ARM* cpu) { - u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7); + u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; - u32 lastreg = 0; for (int i = 0; i < 8; i++) { @@ -1043,23 +923,11 @@ void T_LDMIA(ARM* cpu) } first = false; base += 4; - lastreg = i; } } if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - - - cpu->AddCycles_CDI_LDM(); - if (cpu->Num == 0) - { - u32 lastbase = base - 4; - // no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?) - if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg)))) - cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); - } - return; dataabort: cpu->AddCycles_CDI_LDM(); From 4fcd52ed1682de76b96d9e486c1a4ee983f2f593 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 11 Jul 2024 20:19:25 -0400 Subject: [PATCH 056/115] someday i will learn to test things before pushing them --- src/ARMInterpreter_LoadStore.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index e2726005..3df9acdd 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -260,8 +260,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI_LDM(); return;} \ + u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI_LDM(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ @@ -272,8 +272,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ + if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI_LDM(); return;} \ + u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI_LDM(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI_LDM(); \ @@ -726,7 +726,7 @@ void T_STR_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_LDR(); + cpu->AddCycles_CD_STR(); } void T_LDR_IMM(ARM* cpu) From 789ef21c700774211467cba3261b1c3d88b0a159 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 12 Jul 2024 22:46:22 -0400 Subject: [PATCH 057/115] improve timings for S variants of multiply instructions on arm9 behavior seems to be a quirk of the way they made the interlock cycle mandatory --- src/ARM.h | 13 +++++- src/ARMInterpreter_ALU.cpp | 81 +++++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index dae5d96a..3bbc8735 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -272,12 +272,23 @@ public: void AddCycles_CI(s32 numI) override { - // code+internal + // code||internal s32 numC = CodeCycles; numI += 1; Cycles += std::max(numC, numI); } + void AddCycles_CIL(s32 numI, s32 numL) + { + // (code||internal)+forced interlock + // used by S variants of multiply instructions on the ARM9 + // seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage + // it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags + s32 numC = CodeCycles; + numI += 1; + Cycles += std::max(numC, numI) + numL; + } + void AddCycles_CDI_LDR() override; void AddCycles_CDI_LDM() override; void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index bc655996..e7b3ffb5 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -774,18 +774,23 @@ void A_MUL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; - } - cpu->AddCycles_CI(cycles); + cpu->AddCycles_CI(cycles); + } } void A_MLA(ARM* cpu) @@ -804,18 +809,23 @@ void A_MLA(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; - } - cpu->AddCycles_CI(cycles); + cpu->AddCycles_CI(cycles); + } } void A_UMULL(ARM* cpu) @@ -834,18 +844,24 @@ void A_UMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_UMLAL(ARM* cpu) @@ -867,18 +883,24 @@ void A_UMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_SMULL(ARM* cpu) @@ -897,18 +919,24 @@ void A_SMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_SMLAL(ARM* cpu) @@ -930,18 +958,24 @@ void A_SMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } - u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + { + if (cpu->CurInstr & (1<<20)) + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + else + cpu->AddCycles_CI(1); + } else { + u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void A_SMLAxy(ARM* cpu) @@ -1461,20 +1495,21 @@ void T_MUL_REG(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); - s32 cycles = 0; if (cpu->Num == 0) { - cycles += 3; + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme? } else { + s32 cycles = 0; cpu->SetC(0); // carry flag destroyed, they say. whatever that means... if (a & 0xFF000000) cycles += 4; else if (a & 0x00FF0000) cycles += 3; else if (a & 0x0000FF00) cycles += 2; else cycles += 1; + + cpu->AddCycles_CI(cycles); } - cpu->AddCycles_CI(cycles); } void T_BIC_REG(ARM* cpu) From 764ee9ea1abf6aecebc96b253393f5b10a6a2381 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 13 Jul 2024 08:01:39 -0400 Subject: [PATCH 058/115] improve timings further --- src/ARM.h | 11 ++++++----- src/ARMInterpreter_ALU.cpp | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 3bbc8735..8ea553e6 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -278,12 +278,13 @@ public: Cycles += std::max(numC, numI); } - void AddCycles_CIL(s32 numI, s32 numL) + void AddCycles_CIF(s32 numI, s32 numL) { - // (code||internal)+forced interlock - // used by S variants of multiply instructions on the ARM9 - // seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage - // it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags + // (code||internal)+forced + // used by certain multiply instructions + // seems likely that the execute stage occurs 2 cycles before the fetch stage ends....? + // could also be in some way related to interlock and the memory stage + // though that doesn't explain why some non-S variants trigger this s32 numC = CodeCycles; numI += 1; Cycles += std::max(numC, numI) + numL; diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index e7b3ffb5..00af1dac 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -777,7 +777,7 @@ void A_MUL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 2); else cpu->AddCycles_CI(1); } @@ -812,7 +812,7 @@ void A_MLA(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 2); else cpu->AddCycles_CI(1); } @@ -847,9 +847,9 @@ void A_UMULL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 3); else - cpu->AddCycles_CI(1); + ((ARMv5*)cpu)->AddCycles_CIF(1, 1); } else { @@ -886,9 +886,9 @@ void A_UMLAL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 3); else - cpu->AddCycles_CI(1); + ((ARMv5*)cpu)->AddCycles_CIF(1, 1); } else { @@ -922,9 +922,9 @@ void A_SMULL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 3); else - cpu->AddCycles_CI(1); + ((ARMv5*)cpu)->AddCycles_CIF(1, 1); } else { @@ -961,9 +961,9 @@ void A_SMLAL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); + ((ARMv5*)cpu)->AddCycles_CIF(1, 3); else - cpu->AddCycles_CI(1); + ((ARMv5*)cpu)->AddCycles_CIF(1, 1); } else { @@ -1497,7 +1497,7 @@ void T_MUL_REG(ARM* cpu) if (cpu->Num == 0) { - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme? + ((ARMv5*)cpu)->AddCycles_CIF(1, 2); } else { From 36f4f2c5d3f360184756d1e2261a28d4a362b2cc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:52:26 -0400 Subject: [PATCH 059/115] Revert "improve timings further" This reverts commit 764ee9ea1abf6aecebc96b253393f5b10a6a2381. --- src/ARM.h | 11 +++++------ src/ARMInterpreter_ALU.cpp | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 8ea553e6..3bbc8735 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -278,13 +278,12 @@ public: Cycles += std::max(numC, numI); } - void AddCycles_CIF(s32 numI, s32 numL) + void AddCycles_CIL(s32 numI, s32 numL) { - // (code||internal)+forced - // used by certain multiply instructions - // seems likely that the execute stage occurs 2 cycles before the fetch stage ends....? - // could also be in some way related to interlock and the memory stage - // though that doesn't explain why some non-S variants trigger this + // (code||internal)+forced interlock + // used by S variants of multiply instructions on the ARM9 + // seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage + // it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags s32 numC = CodeCycles; numI += 1; Cycles += std::max(numC, numI) + numL; diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 00af1dac..e7b3ffb5 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -777,7 +777,7 @@ void A_MUL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 2); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else cpu->AddCycles_CI(1); } @@ -812,7 +812,7 @@ void A_MLA(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 2); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else cpu->AddCycles_CI(1); } @@ -847,9 +847,9 @@ void A_UMULL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 3); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else - ((ARMv5*)cpu)->AddCycles_CIF(1, 1); + cpu->AddCycles_CI(1); } else { @@ -886,9 +886,9 @@ void A_UMLAL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 3); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else - ((ARMv5*)cpu)->AddCycles_CIF(1, 1); + cpu->AddCycles_CI(1); } else { @@ -922,9 +922,9 @@ void A_SMULL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 3); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else - ((ARMv5*)cpu)->AddCycles_CIF(1, 1); + cpu->AddCycles_CI(1); } else { @@ -961,9 +961,9 @@ void A_SMLAL(ARM* cpu) if (cpu->Num == 0) { if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIF(1, 3); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); else - ((ARMv5*)cpu)->AddCycles_CIF(1, 1); + cpu->AddCycles_CI(1); } else { @@ -1497,7 +1497,7 @@ void T_MUL_REG(ARM* cpu) if (cpu->Num == 0) { - ((ARMv5*)cpu)->AddCycles_CIF(1, 2); + ((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme? } else { From 13578a3cc95ab6c77c913a5167213380ede402f4 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:52:28 -0400 Subject: [PATCH 060/115] Revert "improve timings for S variants of multiply instructions on arm9" This reverts commit 789ef21c700774211467cba3261b1c3d88b0a159. --- src/ARM.h | 13 +----- src/ARMInterpreter_ALU.cpp | 81 +++++++++++--------------------------- 2 files changed, 24 insertions(+), 70 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 3bbc8735..dae5d96a 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -272,23 +272,12 @@ public: void AddCycles_CI(s32 numI) override { - // code||internal + // code+internal s32 numC = CodeCycles; numI += 1; Cycles += std::max(numC, numI); } - void AddCycles_CIL(s32 numI, s32 numL) - { - // (code||internal)+forced interlock - // used by S variants of multiply instructions on the ARM9 - // seems that instead of adding extra hardware logic to allow for handling the memory stage of the instructions during the execute stage - // it instead seems to force a two cycle interlock allowing for the interlocked cycle to be executed without any special logic + presumably an extra cycle to set flags - s32 numC = CodeCycles; - numI += 1; - Cycles += std::max(numC, numI) + numL; - } - void AddCycles_CDI_LDR() override; void AddCycles_CDI_LDM() override; void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index e7b3ffb5..bc655996 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -774,23 +774,18 @@ void A_MUL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; - - cpu->AddCycles_CI(cycles); } + + cpu->AddCycles_CI(cycles); } void A_MLA(ARM* cpu) @@ -809,23 +804,18 @@ void A_MLA(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; - - cpu->AddCycles_CI(cycles); } + + cpu->AddCycles_CI(cycles); } void A_UMULL(ARM* cpu) @@ -844,24 +834,18 @@ void A_UMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; - - cpu->AddCycles_CI(cycles); } + cpu->AddCycles_CI(cycles); } void A_UMLAL(ARM* cpu) @@ -883,24 +867,18 @@ void A_UMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; - - cpu->AddCycles_CI(cycles); } + cpu->AddCycles_CI(cycles); } void A_SMULL(ARM* cpu) @@ -919,24 +897,18 @@ void A_SMULL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; - - cpu->AddCycles_CI(cycles); } + cpu->AddCycles_CI(cycles); } void A_SMLAL(ARM* cpu) @@ -958,24 +930,18 @@ void A_SMLAL(ARM* cpu) if (cpu->Num==1) cpu->SetC(0); } + u32 cycles; if (cpu->Num == 0) - { - if (cpu->CurInstr & (1<<20)) - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); - else - cpu->AddCycles_CI(1); - } + cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; else { - u32 cycles; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; - - cpu->AddCycles_CI(cycles); } + cpu->AddCycles_CI(cycles); } void A_SMLAxy(ARM* cpu) @@ -1495,21 +1461,20 @@ void T_MUL_REG(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); + s32 cycles = 0; if (cpu->Num == 0) { - ((ARMv5*)cpu)->AddCycles_CIL(1, 2); // checkme? + cycles += 3; } else { - s32 cycles = 0; cpu->SetC(0); // carry flag destroyed, they say. whatever that means... if (a & 0xFF000000) cycles += 4; else if (a & 0x00FF0000) cycles += 3; else if (a & 0x0000FF00) cycles += 2; else cycles += 1; - - cpu->AddCycles_CI(cycles); } + cpu->AddCycles_CI(cycles); } void T_BIC_REG(ARM* cpu) From 7cd50e7b56755f7def5aeb3d7a2f01037af25928 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:56:43 -0400 Subject: [PATCH 061/115] fix some multiply timings --- src/ARMInterpreter_ALU.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index bc655996..37c79904 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -836,7 +836,7 @@ void A_UMULL(ARM* cpu) u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; @@ -869,7 +869,7 @@ void A_UMLAL(ARM* cpu) u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; @@ -899,7 +899,7 @@ void A_SMULL(ARM* cpu) u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; @@ -932,7 +932,7 @@ void A_SMLAL(ARM* cpu) u32 cycles; if (cpu->Num == 0) - cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1; + cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; else { if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; From 3c936d84b3b9821ba211f4c9fb5235c493260ad2 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 3 Aug 2024 16:20:50 -0400 Subject: [PATCH 062/115] improve mrs, mrc timings --- src/ARMInterpreter.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index f9623147..e4b23641 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -202,7 +202,9 @@ void A_MRS(ARM* cpu) psr = cpu->CPSR; cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; - cpu->AddCycles_C(); + + if (cpu->Num != 1) cpu->AddCycles_CI(1); // arm9 + else cpu->AddCycles_C(); // arm7 } @@ -261,7 +263,8 @@ void A_MRC(ARM* cpu) return A_UNK(cpu); // TODO: check what kind of exception it really is } - cpu->AddCycles_CI(2 + 1); // TODO: checkme + if (cpu->Num != 1) cpu->AddCycles_CI(1); // checkme + else cpu->AddCycles_CI(2 + 1); // TODO: checkme } From 2e421e29e355f7e21e5419a5c0735044325676b6 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 27 Jul 2024 15:02:13 -0400 Subject: [PATCH 063/115] cache should be disabled when pu is disabled --- src/CP15.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/CP15.cpp b/src/CP15.cpp index d5898ac8..cba249fc 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -266,8 +266,6 @@ void ARMv5::UpdatePURegions(bool update_all) // PU disabled u8 mask = 0x07; - if (CP15Control & (1<<2)) mask |= 0x30; - if (CP15Control & (1<<12)) mask |= 0x40; memset(PU_UserMap, mask, 0x100000); memset(PU_PrivMap, mask, 0x100000); From 4b703d24b53b1cba9c7ea8324bf89208e72a8a0b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:39:55 -0400 Subject: [PATCH 064/115] improve msr timings for arm9 --- src/ARMInterpreter.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index e4b23641..0122e082 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -121,7 +121,8 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - cpu->AddCycles_C(); + if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + else cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) @@ -174,7 +175,8 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - cpu->AddCycles_C(); + if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + else cpu->AddCycles_C(); } void A_MRS(ARM* cpu) From ab2a8f128f255895abd323336dea6d257ef42c22 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 4 Aug 2024 14:45:28 -0400 Subject: [PATCH 065/115] revert timing tweaks, finish thumb interwork code --- src/ARM.cpp | 126 +------------------------------ src/ARM.h | 45 +++++++---- src/ARMInterpreter_LoadStore.cpp | 106 +++++++++++++------------- src/CP15.cpp | 18 ----- 4 files changed, 83 insertions(+), 212 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index e1f93a58..16c53dc1 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -300,9 +300,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) u32 oldregion = R[15] >> 24; u32 newregion = addr >> 24; - - if (addr < ITCMSize) CodeRegion = Mem9_ITCM; - else CodeRegion = NDS.ARM9Regions[addr >> 14]; RegionCodeCycles = MemTimings[addr >> 12][0]; @@ -644,7 +641,7 @@ void ARMv5::Execute() R[15] += 2; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 1; } + if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); // actually execute @@ -1256,127 +1253,6 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) } -void ARMv5::AddCycles_CD_STR() -{ - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - s32 early; - if (DataRegion == Mem9_ITCM) - { - early = (CodeRegion == Mem9_ITCM) ? 0 : 2; - } - else if (DataRegion == Mem9_DTCM) - { - early = 2; - } - else if (DataRegion == Mem9_MainRAM) - { - early = (CodeRegion == Mem9_MainRAM) ? 0 : 18; // CHECKME: how early can main ram be? - } - else early = (DataRegion == CodeRegion) ? 4 : 6; - - s32 code = numC - early; - if (code < 0) code = 0; - Cycles += std::max(code + numD, numC); -} - -void ARMv5::AddCycles_CD_STM() -{ - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - s32 early; - if (DataRegion == Mem9_ITCM) - { - early = (CodeRegion == Mem9_ITCM) ? -1 : 0; // stm adds either: no penalty or benefit to itcm loads, or a 1 cycle penalty if executing from itcm. - } - else if (DataRegion == Mem9_DTCM) - { - early = 2; - } - else if (DataRegion == Mem9_MainRAM) - { - early = (CodeRegion == Mem9_MainRAM) ? 0 : 18; // CHECKME: how early can main ram be? - } - else early = (DataRegion == CodeRegion) ? 4 : 6; - - s32 code = numC - early; - if (code < 0) code = 0; - Cycles += std::max(code + numD, numC); -} - -void ARMv5::AddCycles_CDI_LDR() -{ - // LDR cycles. ARM9 seems to skip the internal cycle here. - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early - s32 early; - if (DataRegion == Mem9_ITCM) - { - early = (CodeRegion == Mem9_ITCM) ? 0 : 2; - } - else if (DataRegion == Mem9_DTCM) - { - early = 2; - } - else if (DataRegion == Mem9_MainRAM) - { - early = (CodeRegion == Mem9_MainRAM) ? 0 : 6; - } - else early = 6; - - s32 code = numC - early; - if (code < 0) code = 0; - Cycles += std::max(code + numD, numC); -} - -void ARMv5::AddCycles_CDI_LDM() -{ - // LDM cycles. ARM9 seems to skip the internal cycle here. - s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; - s32 numD = DataCycles; - - // if a 32 bit bus, start 2 cycles early; else, start 4 cycles early - s32 early; - switch (DataRegion) - { - case 0: // background region; - case Mem9_DTCM: - case Mem9_BIOS: - case Mem9_WRAM: - case Mem9_IO: - case Mem9_Pal: // CHECKME - default: - early = 2; - break; - - case Mem9_OAM: // CHECKME - case Mem9_GBAROM: - case Mem9_GBARAM: - early = 4; - break; - - case Mem9_MainRAM: - early = (CodeRegion == Mem9_MainRAM) ? 0 : 4; - break; - - case Mem9_VRAM: // the dsi can toggle the bus width of vram between 32 and 16 bit - early = (NDS.ConsoleType == 0 || !(((DSi&)NDS).SCFG_EXT[0] & (1<<13))) ? 4 : 2; - break; - - case Mem9_ITCM: // itcm data fetches cannot be done at the same time as a code fetch, it'll even incurr a 1 cycle penalty when executing from itcm - early = (CodeRegion == Mem9_ITCM) ? -1 : 0; - break; - } - - s32 code = numC - early; - if (code < 0) code = 0; - Cycles += std::max(code + numD, numC); -} - void ARMv4::AddCycles_C() { // code only. this code fetch is sequential. diff --git a/src/ARM.h b/src/ARM.h index dae5d96a..7558f7a3 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -266,23 +266,41 @@ public: void AddCycles_C() override { // code only. always nonseq 32-bit for ARM9. - s32 numC = CodeCycles; + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; Cycles += numC; } void AddCycles_CI(s32 numI) override { // code+internal - s32 numC = CodeCycles; - numI += 1; - Cycles += std::max(numC, numI); + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + Cycles += numC + numI; } - void AddCycles_CDI_LDR() override; - void AddCycles_CDI_LDM() override; - void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str - void AddCycles_CD_STR() override; - void AddCycles_CD_STM() override; + void AddCycles_CDI() override + { + // LDR/LDM cycles. ARM9 seems to skip the internal cycle there. + // TODO: ITCM data fetches shouldn't be parallelized, they say + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + //if (DataRegion != CodeRegion) + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); + //else + // Cycles += numC + numD; + } + + void AddCycles_CD() override + { + // TODO: ITCM data fetches shouldn't be parallelized, they say + s32 numC = (R[15] & 0x2) ? 0 : CodeCycles; + s32 numD = DataCycles; + + //if (DataRegion != CodeRegion) + Cycles += std::max(numC + numD - 6, std::max(numC, numD)); + //else + // Cycles += numC + numD; + } void GetCodeMemRegion(u32 addr, MemRegion* region); @@ -396,13 +414,8 @@ public: bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; - void AddCycles_CDI(); - void AddCycles_CDI_LDR() override { AddCycles_CDI(); } - void AddCycles_CDI_LDM() override { AddCycles_CDI(); } - void AddCycles_CDI_SWP() override { AddCycles_CDI(); } // checkme? - void AddCycles_CD(); - void AddCycles_CD_STR() override { AddCycles_CD(); } - void AddCycles_CD_STM() override { AddCycles_CD(); } + void AddCycles_CDI() override; + void AddCycles_CD() override; protected: u8 BusRead8(u32 addr) override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 3df9acdd..580c66fc 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -66,7 +66,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(offset, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -77,7 +77,7 @@ namespace melonDS::ARMInterpreter if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ storeval += 4; \ bool dataabort = !cpu->DataWrite32(addr, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -86,7 +86,7 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(offset, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -96,20 +96,20 @@ namespace melonDS::ARMInterpreter u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite8(addr, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDR \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = ROR(val, ((offset&0x3)<<3)); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ - if (cpu->Num==1) val &= ~0x1; \ + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; \ cpu->JumpTo(val); \ } \ else \ @@ -121,13 +121,13 @@ namespace melonDS::ARMInterpreter #define A_LDR_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = ROR(val, ((addr&0x3)<<3)); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) \ { \ - if (cpu->Num==1) val &= ~0x1; \ + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; \ cpu->JumpTo(val); \ } \ else \ @@ -138,7 +138,7 @@ namespace melonDS::ARMInterpreter #define A_LDRB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -148,7 +148,7 @@ namespace melonDS::ARMInterpreter #define A_LDRB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -240,7 +240,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(offset, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -249,7 +249,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ bool dataabort = !cpu->DataWrite16(addr, storeval); \ - cpu->AddCycles_CD_STR(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -260,11 +260,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI_LDM(); return;} \ - u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI_LDM(); return;} \ + if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ - cpu->AddCycles_CDI_LDM(); \ + cpu->AddCycles_CDI(); \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_LDRD_POST \ @@ -272,11 +272,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI_LDM(); return;} \ - u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI_LDM(); return;} \ + if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ + u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ - cpu->AddCycles_CDI_LDM(); \ + cpu->AddCycles_CDI(); \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_STRD \ @@ -287,7 +287,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ - cpu->AddCycles_CD_STM(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -299,14 +299,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ - cpu->AddCycles_CD_STM(); \ + cpu->AddCycles_CD(); \ if (dataabort) return; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ @@ -315,7 +315,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ @@ -324,7 +324,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSB \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -334,7 +334,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSB_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s8)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -344,7 +344,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSH \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -354,7 +354,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_LDRSH_POST \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI_LDR(); \ + cpu->AddCycles_CDI(); \ if (dataabort) return; \ val = (s32)(s16)val; \ if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ @@ -415,7 +415,7 @@ void A_SWP(ARM* cpu) } cpu->DataCycles += numD; } - cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI(); } void A_SWPB(ARM* cpu) @@ -437,7 +437,7 @@ void A_SWPB(ARM* cpu) } cpu->DataCycles += numD; } - cpu->AddCycles_CDI_SWP(); + cpu->AddCycles_CDI(); } @@ -501,7 +501,7 @@ void A_LDM(ARM* cpu) if (!preinc) base += 4; - if (cpu->Num == 1) + if (cpu->Num == 1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc &= ~0x1; } @@ -546,7 +546,7 @@ void A_LDM(ARM* cpu) cpu->R[baseid] = oldbase; } - cpu->AddCycles_CDI_LDM(); + cpu->AddCycles_CDI(); } void A_STM(ARM* cpu) @@ -630,7 +630,7 @@ void A_STM(ARM* cpu) cpu->R[baseid] = oldbase; } - cpu->AddCycles_CD_STM(); + cpu->AddCycles_CD(); } @@ -645,7 +645,7 @@ void T_LDR_PCREL(ARM* cpu) u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -654,7 +654,7 @@ void T_STR_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_STRB_REG(ARM* cpu) @@ -662,7 +662,7 @@ void T_STRB_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDR_REG(ARM* cpu) @@ -673,7 +673,7 @@ void T_LDR_REG(ARM* cpu) if (cpu->DataRead32(addr, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } void T_LDRB_REG(ARM* cpu) @@ -681,7 +681,7 @@ void T_LDRB_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -690,7 +690,7 @@ void T_STRH_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDRSB_REG(ARM* cpu) @@ -699,7 +699,7 @@ void T_LDRSB_REG(ARM* cpu) if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } void T_LDRH_REG(ARM* cpu) @@ -707,7 +707,7 @@ void T_LDRH_REG(ARM* cpu) u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } void T_LDRSH_REG(ARM* cpu) @@ -716,7 +716,7 @@ void T_LDRSH_REG(ARM* cpu) if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -726,7 +726,7 @@ void T_STR_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDR_IMM(ARM* cpu) @@ -737,7 +737,7 @@ void T_LDR_IMM(ARM* cpu) u32 val; if (cpu->DataRead32(offset, &val)) cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } void T_STRB_IMM(ARM* cpu) @@ -746,7 +746,7 @@ void T_STRB_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDRB_IMM(ARM* cpu) @@ -755,7 +755,7 @@ void T_LDRB_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -765,7 +765,7 @@ void T_STRH_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDRH_IMM(ARM* cpu) @@ -774,7 +774,7 @@ void T_LDRH_IMM(ARM* cpu) offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -784,7 +784,7 @@ void T_STR_SPREL(ARM* cpu) offset += cpu->R[13]; cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CD_STR(); + cpu->AddCycles_CD(); } void T_LDR_SPREL(ARM* cpu) @@ -793,7 +793,7 @@ void T_LDR_SPREL(ARM* cpu) offset += cpu->R[13]; cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI_LDR(); + cpu->AddCycles_CDI(); } @@ -841,7 +841,7 @@ void T_PUSH(ARM* cpu) cpu->R[13] = wbbase; dataabort: - cpu->AddCycles_CD_STM(); + cpu->AddCycles_CD(); } void T_POP(ARM* cpu) @@ -871,7 +871,7 @@ void T_POP(ARM* cpu) { goto dataabort; } - if (cpu->Num==1) pc |= 0x1; + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; cpu->JumpTo(pc); base += 4; } @@ -879,7 +879,7 @@ void T_POP(ARM* cpu) cpu->R[13] = base; dataabort: - cpu->AddCycles_CDI_LDM(); + cpu->AddCycles_CDI(); } void T_STMIA(ARM* cpu) @@ -904,7 +904,7 @@ void T_STMIA(ARM* cpu) // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; dataabort: - cpu->AddCycles_CD_STM(); + cpu->AddCycles_CD(); } void T_LDMIA(ARM* cpu) @@ -930,7 +930,7 @@ void T_LDMIA(ARM* cpu) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; dataabort: - cpu->AddCycles_CDI_LDM(); + cpu->AddCycles_CDI(); } diff --git a/src/CP15.cpp b/src/CP15.cpp index cba249fc..bf1d2edc 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -815,21 +815,18 @@ bool ARMv5::DataRead8(u32 addr, u32* val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } *val = BusRead8(addr); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -846,21 +843,18 @@ bool ARMv5::DataRead16(u32 addr, u32* val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } *val = BusRead16(addr); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -877,21 +871,18 @@ bool ARMv5::DataRead32(u32 addr, u32* val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; return true; } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)]; return true; } *val = BusRead32(addr); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][2]; return true; } @@ -934,7 +925,6 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -942,14 +932,12 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite8(addr, val); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -966,7 +954,6 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -974,14 +961,12 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite16(addr, val); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][1]; return true; } @@ -998,7 +983,6 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) if (addr < ITCMSize) { - DataRegion = Mem9_ITCM; DataCycles = 1; *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val; NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); @@ -1006,14 +990,12 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) } if ((addr & DTCMMask) == DTCMBase) { - DataRegion = Mem9_DTCM; DataCycles = 1; *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; return true; } BusWrite32(addr, val); - DataRegion = NDS.ARM9Regions[addr >> 14]; DataCycles = MemTimings[addr >> 12][2]; return true; } From 346ac1380f043c62afc42c0aff5c67b7c56be47b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 4 Aug 2024 15:21:23 -0400 Subject: [PATCH 066/115] forgot to remove a thingy when removing timing reworks --- src/ARM.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/ARM.h b/src/ARM.h index 7558f7a3..b41389e1 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -140,11 +140,8 @@ public: virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; - virtual void AddCycles_CDI_LDR() = 0; - virtual void AddCycles_CDI_LDM() = 0; - virtual void AddCycles_CDI_SWP() = 0; - virtual void AddCycles_CD_STR() = 0; - virtual void AddCycles_CD_STM() = 0; + virtual void AddCycles_CDI() = 0; + virtual void AddCycles_CD() = 0; void CheckGdbIncoming(); From 587958e6781cf4d44cd9c611f2589e7d3fc36e5d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 4 Aug 2024 23:31:20 -0400 Subject: [PATCH 067/115] Improve accuracy of prefetch aborts comes with a small-ish performance hit --- src/ARM.cpp | 38 ++++++++++++++++++-------------------- src/CP15.cpp | 29 ++++++++++++++--------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6ac387b2..ae55514a 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -343,12 +343,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) CPSR &= ~0x20; } - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return; - } - NDS.MonitorARM9Jump(addr); } @@ -575,15 +569,6 @@ void ARMv5::PrefetchAbort() CPSR |= 0x97; UpdateMode(oldcpsr, CPSR); - // this shouldn't happen, but if it does, we're stuck in some nasty endless loop - // so better take care of it - if (!(PU_Map[ExceptionBase>>12] & 0x04)) - { - Log(LogLevel::Error, "!!!!! EXCEPTION REGION NOT EXECUTABLE. THIS IS VERY BAD!!\n"); - NDS.Stop(Platform::StopReason::BadExceptionRegion); - return; - } - R_ABT[2] = oldcpsr; R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); JumpTo(ExceptionBase + 0x0C); @@ -685,10 +670,18 @@ void ARMv5::Execute() NextInstr[0] = NextInstr[1]; if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); - + + // handle aborted instructions + if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] + { + PrefetchAbort(); + } // actually execute - u32 icode = (CurInstr >> 6) & 0x3FF; - ARMInterpreter::THUMBInstrTable[icode](this); + else [[likely]] + { + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); + } } else { @@ -700,9 +693,14 @@ void ARMv5::Execute() CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15], false); - + + // handle aborted instructions + if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // todo: check for bkpt instruction? + { + PrefetchAbort(); + } // actually execute - if (CheckCondition(CurInstr >> 28)) + else if (CheckCondition(CurInstr >> 28)) [[likely]] { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); diff --git a/src/CP15.cpp b/src/CP15.cpp index bf1d2edc..6fcaff93 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -773,14 +773,13 @@ u32 ARMv5::CP15Read(u32 id) const u32 ARMv5::CodeRead32(u32 addr, bool branch) { - /*if (branch || (!(addr & 0xFFF))) + // prefetch abort + // the actual exception is not raised until the aborted instruction is executed + if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] { - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return 0; - } - }*/ + CodeCycles = 1; + return 0; + } if (addr < ITCMSize) { @@ -807,7 +806,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) bool ARMv5::DataRead8(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -833,7 +832,7 @@ bool ARMv5::DataRead8(u32 addr, u32* val) bool ARMv5::DataRead16(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -861,7 +860,7 @@ bool ARMv5::DataRead16(u32 addr, u32* val) bool ARMv5::DataRead32(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -889,7 +888,7 @@ bool ARMv5::DataRead32(u32 addr, u32* val) bool ARMv5::DataRead32S(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -917,7 +916,7 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) bool ARMv5::DataWrite8(u32 addr, u8 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -944,7 +943,7 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) bool ARMv5::DataWrite16(u32 addr, u16 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -973,7 +972,7 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) bool ARMv5::DataWrite32(u32 addr, u32 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -1002,7 +1001,7 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) bool ARMv5::DataWrite32S(u32 addr, u32 val, bool dataabort) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { if (!dataabort) DataAbort(); return false; From 0dc619d6155b0f6533ff35d13cf5f00add4b1939 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:41:25 -0400 Subject: [PATCH 068/115] Revert "Improve accuracy of prefetch aborts" This reverts commit 587958e6781cf4d44cd9c611f2589e7d3fc36e5d. --- src/ARM.cpp | 38 ++++++++++++++++++++------------------ src/CP15.cpp | 29 +++++++++++++++-------------- 2 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index ae55514a..6ac387b2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -343,6 +343,12 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) CPSR &= ~0x20; } + if (!(PU_Map[addr>>12] & 0x04)) + { + PrefetchAbort(); + return; + } + NDS.MonitorARM9Jump(addr); } @@ -569,6 +575,15 @@ void ARMv5::PrefetchAbort() CPSR |= 0x97; UpdateMode(oldcpsr, CPSR); + // this shouldn't happen, but if it does, we're stuck in some nasty endless loop + // so better take care of it + if (!(PU_Map[ExceptionBase>>12] & 0x04)) + { + Log(LogLevel::Error, "!!!!! EXCEPTION REGION NOT EXECUTABLE. THIS IS VERY BAD!!\n"); + NDS.Stop(Platform::StopReason::BadExceptionRegion); + return; + } + R_ABT[2] = oldcpsr; R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); JumpTo(ExceptionBase + 0x0C); @@ -670,18 +685,10 @@ void ARMv5::Execute() NextInstr[0] = NextInstr[1]; if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); - - // handle aborted instructions - if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] - { - PrefetchAbort(); - } + // actually execute - else [[likely]] - { - u32 icode = (CurInstr >> 6) & 0x3FF; - ARMInterpreter::THUMBInstrTable[icode](this); - } + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); } else { @@ -693,14 +700,9 @@ void ARMv5::Execute() CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15], false); - - // handle aborted instructions - if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // todo: check for bkpt instruction? - { - PrefetchAbort(); - } + // actually execute - else if (CheckCondition(CurInstr >> 28)) [[likely]] + if (CheckCondition(CurInstr >> 28)) { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); diff --git a/src/CP15.cpp b/src/CP15.cpp index 6fcaff93..bf1d2edc 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -773,13 +773,14 @@ u32 ARMv5::CP15Read(u32 id) const u32 ARMv5::CodeRead32(u32 addr, bool branch) { - // prefetch abort - // the actual exception is not raised until the aborted instruction is executed - if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] + /*if (branch || (!(addr & 0xFFF))) { - CodeCycles = 1; - return 0; - } + if (!(PU_Map[addr>>12] & 0x04)) + { + PrefetchAbort(); + return 0; + } + }*/ if (addr < ITCMSize) { @@ -806,7 +807,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) bool ARMv5::DataRead8(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); return false; @@ -832,7 +833,7 @@ bool ARMv5::DataRead8(u32 addr, u32* val) bool ARMv5::DataRead16(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); return false; @@ -860,7 +861,7 @@ bool ARMv5::DataRead16(u32 addr, u32* val) bool ARMv5::DataRead32(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); return false; @@ -888,7 +889,7 @@ bool ARMv5::DataRead32(u32 addr, u32* val) bool ARMv5::DataRead32S(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x01)) { DataAbort(); return false; @@ -916,7 +917,7 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) bool ARMv5::DataWrite8(u32 addr, u8 val) { - if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); return false; @@ -943,7 +944,7 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) bool ARMv5::DataWrite16(u32 addr, u16 val) { - if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); return false; @@ -972,7 +973,7 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) bool ARMv5::DataWrite32(u32 addr, u32 val) { - if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x02)) { DataAbort(); return false; @@ -1001,7 +1002,7 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) bool ARMv5::DataWrite32S(u32 addr, u32 val, bool dataabort) { - if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] + if (!(PU_Map[addr>>12] & 0x02)) { if (!dataabort) DataAbort(); return false; From eedd2806f9d6c7505130db9fe57d97ce7415e2ba Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 5 Aug 2024 12:37:42 -0400 Subject: [PATCH 069/115] Reapply "Improve accuracy of prefetch aborts" This reverts commit 0dc619d6155b0f6533ff35d13cf5f00add4b1939. --- src/ARM.cpp | 38 ++++++++++++++++++-------------------- src/CP15.cpp | 29 ++++++++++++++--------------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6ac387b2..ae55514a 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -343,12 +343,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) CPSR &= ~0x20; } - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return; - } - NDS.MonitorARM9Jump(addr); } @@ -575,15 +569,6 @@ void ARMv5::PrefetchAbort() CPSR |= 0x97; UpdateMode(oldcpsr, CPSR); - // this shouldn't happen, but if it does, we're stuck in some nasty endless loop - // so better take care of it - if (!(PU_Map[ExceptionBase>>12] & 0x04)) - { - Log(LogLevel::Error, "!!!!! EXCEPTION REGION NOT EXECUTABLE. THIS IS VERY BAD!!\n"); - NDS.Stop(Platform::StopReason::BadExceptionRegion); - return; - } - R_ABT[2] = oldcpsr; R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); JumpTo(ExceptionBase + 0x0C); @@ -685,10 +670,18 @@ void ARMv5::Execute() NextInstr[0] = NextInstr[1]; if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); - + + // handle aborted instructions + if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] + { + PrefetchAbort(); + } // actually execute - u32 icode = (CurInstr >> 6) & 0x3FF; - ARMInterpreter::THUMBInstrTable[icode](this); + else [[likely]] + { + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); + } } else { @@ -700,9 +693,14 @@ void ARMv5::Execute() CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15], false); - + + // handle aborted instructions + if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // todo: check for bkpt instruction? + { + PrefetchAbort(); + } // actually execute - if (CheckCondition(CurInstr >> 28)) + else if (CheckCondition(CurInstr >> 28)) [[likely]] { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); diff --git a/src/CP15.cpp b/src/CP15.cpp index bf1d2edc..6fcaff93 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -773,14 +773,13 @@ u32 ARMv5::CP15Read(u32 id) const u32 ARMv5::CodeRead32(u32 addr, bool branch) { - /*if (branch || (!(addr & 0xFFF))) + // prefetch abort + // the actual exception is not raised until the aborted instruction is executed + if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] { - if (!(PU_Map[addr>>12] & 0x04)) - { - PrefetchAbort(); - return 0; - } - }*/ + CodeCycles = 1; + return 0; + } if (addr < ITCMSize) { @@ -807,7 +806,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) bool ARMv5::DataRead8(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -833,7 +832,7 @@ bool ARMv5::DataRead8(u32 addr, u32* val) bool ARMv5::DataRead16(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -861,7 +860,7 @@ bool ARMv5::DataRead16(u32 addr, u32* val) bool ARMv5::DataRead32(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -889,7 +888,7 @@ bool ARMv5::DataRead32(u32 addr, u32* val) bool ARMv5::DataRead32S(u32 addr, u32* val) { - if (!(PU_Map[addr>>12] & 0x01)) + if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { DataAbort(); return false; @@ -917,7 +916,7 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) bool ARMv5::DataWrite8(u32 addr, u8 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -944,7 +943,7 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) bool ARMv5::DataWrite16(u32 addr, u16 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -973,7 +972,7 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) bool ARMv5::DataWrite32(u32 addr, u32 val) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { DataAbort(); return false; @@ -1002,7 +1001,7 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) bool ARMv5::DataWrite32S(u32 addr, u32 val, bool dataabort) { - if (!(PU_Map[addr>>12] & 0x02)) + if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { if (!dataabort) DataAbort(); return false; From a85b2bfb5647c4e228ea8683ca0481b6a69c2619 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 5 Aug 2024 14:57:17 -0400 Subject: [PATCH 070/115] tweak when irqs are triggered and fix prefetch aborts also ig add some comments next to the svc funcs so that someone searching for "swi" can find them easier --- src/ARM.cpp | 41 +++++++++++++++++++++-------------------- src/ARMInterpreter.cpp | 4 ++-- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index ae55514a..e01e0e36 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -548,7 +548,7 @@ void ARM::TriggerIRQ() UpdateMode(oldcpsr, CPSR); R_IRQ[2] = oldcpsr; - R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x18); // ARDS cheat support @@ -570,7 +570,7 @@ void ARMv5::PrefetchAbort() UpdateMode(oldcpsr, CPSR); R_ABT[2] = oldcpsr; - R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x0C); } @@ -609,7 +609,7 @@ void ARMv5::Execute() { Halted = 0; if (NDS.IME[0] & 0x1) - TriggerIRQ(); + IRQ = 1; } else { @@ -671,13 +671,13 @@ void ARMv5::Execute() if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; } else NextInstr[1] = CodeRead32(R[15], false); - // handle aborted instructions - if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] + + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); } - // actually execute - else [[likely]] + else [[likely]] // actually execute { u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); @@ -694,13 +694,13 @@ void ARMv5::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15], false); - // handle aborted instructions - if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // todo: check for bkpt instruction? + + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); } - // actually execute - else if (CheckCondition(CurInstr >> 28)) [[likely]] + else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); @@ -727,8 +727,6 @@ void ARMv5::Execute() if (NDS::IME[0] & 0x1) TriggerIRQ(); }*/ - if (IRQ) TriggerIRQ(); - } NDS.ARM9Timestamp += Cycles; @@ -760,7 +758,7 @@ void ARMv4::Execute() { Halted = 0; if (NDS.IME[1] & 0x1) - TriggerIRQ(); + IRQ = 1; } else { @@ -820,9 +818,13 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead16(R[15]); - // actually execute - u32 icode = (CurInstr >> 6); - ARMInterpreter::THUMBInstrTable[icode](this); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else + { + // actually execute + u32 icode = (CurInstr >> 6); + ARMInterpreter::THUMBInstrTable[icode](this); + } } else { @@ -835,8 +837,8 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15]); - // actually execute - if (CheckCondition(CurInstr >> 28)) + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CheckCondition(CurInstr >> 28)) // actually execute { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); ARMInterpreter::ARMInstrTable[icode](this); @@ -859,7 +861,6 @@ void ARMv4::Execute() if (NDS::IME[1] & 0x1) TriggerIRQ(); }*/ - if (IRQ) TriggerIRQ(); } NDS.ARM7Timestamp += Cycles; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 8ca85976..6e6c9a8d 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -271,7 +271,7 @@ void A_MRC(ARM* cpu) -void A_SVC(ARM* cpu) +void A_SVC(ARM* cpu) // A_SWI { u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; @@ -283,7 +283,7 @@ void A_SVC(ARM* cpu) cpu->JumpTo(cpu->ExceptionBase + 0x08); } -void T_SVC(ARM* cpu) +void T_SVC(ARM* cpu) // T_SWI { u32 oldcpsr = cpu->CPSR; cpu->CPSR &= ~0xBF; From 332a39dbafde51d6b703c78635246aaa098f337b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 5 Aug 2024 16:14:17 -0400 Subject: [PATCH 071/115] fix JIT being borked --- src/ARM.cpp | 37 ++++++++++++++++++++++++++----------- src/ARM.h | 1 + 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index e01e0e36..9919cbcb 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -537,6 +537,7 @@ void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony) } } +template void ARM::TriggerIRQ() { if (CPSR & 0x80) @@ -548,7 +549,10 @@ void ARM::TriggerIRQ() UpdateMode(oldcpsr, CPSR); R_IRQ[2] = oldcpsr; - R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); + if constexpr (mode == CPUExecuteMode::JIT) + R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); + else + R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x18); // ARDS cheat support @@ -559,6 +563,11 @@ void ARM::TriggerIRQ() NDS.AREngine.RunCheats(); } } +template void ARM::TriggerIRQ(); +template void ARM::TriggerIRQ(); +#ifdef JIT_ENABLED +template void ARM::TriggerIRQ(); +#endif void ARMv5::PrefetchAbort() { @@ -609,7 +618,10 @@ void ARMv5::Execute() { Halted = 0; if (NDS.IME[0] & 0x1) - IRQ = 1; + { + if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ(); + else IRQ = 1; + } } else { @@ -643,7 +655,7 @@ void ARMv5::Execute() { // this order is crucial otherwise idle loops waiting for an IRQ won't function if (IRQ) - TriggerIRQ(); + TriggerIRQ(); if (Halted || IdleLoop) { @@ -672,7 +684,7 @@ void ARMv5::Execute() else NextInstr[1] = CodeRead32(R[15], false); - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); @@ -695,7 +707,7 @@ void ARMv5::Execute() NextInstr[1] = CodeRead32(R[15], false); - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); @@ -725,7 +737,7 @@ void ARMv5::Execute() /*if (NDS::IF[0] & NDS::IE[0]) { if (NDS::IME[0] & 0x1) - TriggerIRQ(); + TriggerIRQ(); }*/ } @@ -758,7 +770,10 @@ void ARMv4::Execute() { Halted = 0; if (NDS.IME[1] & 0x1) - IRQ = 1; + { + if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ(); + else IRQ = 1; + } } else { @@ -791,7 +806,7 @@ void ARMv4::Execute() if (StopExecution) { if (IRQ) - TriggerIRQ(); + TriggerIRQ(); if (Halted || IdleLoop) { @@ -818,7 +833,7 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead16(R[15]); - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else { // actually execute @@ -837,7 +852,7 @@ void ARMv4::Execute() NextInstr[0] = NextInstr[1]; NextInstr[1] = CodeRead32(R[15]); - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else if (CheckCondition(CurInstr >> 28)) // actually execute { u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); @@ -859,7 +874,7 @@ void ARMv4::Execute() /*if (NDS::IF[1] & NDS::IE[1]) { if (NDS::IME[1] & 0x1) - TriggerIRQ(); + TriggerIRQ(); }*/ } diff --git a/src/ARM.h b/src/ARM.h index d4d3f5d4..2603e646 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -129,6 +129,7 @@ public: void UpdateMode(u32 oldmode, u32 newmode, bool phony = false); + template void TriggerIRQ(); void SetupCodeMem(u32 addr); From 40e8e8e7bd9574a37d794a5da1a76664c2f22f35 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 23 Aug 2024 19:13:17 -0400 Subject: [PATCH 072/115] rework single load/stores to use a shared instruction --- src/ARMInterpreter_LoadStore.cpp | 314 ++++++++++--------------------- 1 file changed, 101 insertions(+), 213 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 580c66fc..734b57d0 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -58,101 +58,100 @@ namespace melonDS::ARMInterpreter shiftop(offset, shift); \ if (!(cpu->CurInstr & (1<<23))) offset = -offset; +enum class Writeback +{ + None = 0, + Pre, + Post, +}; + +template +void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +{ + static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + + u32 addr; + if constexpr (writeback != Writeback::Post) addr = offset + cpu->R[rn]; + else addr = cpu->R[rn]; + + u32 val; + bool dataabort; + if constexpr (size == 8) dataabort = !cpu->DataRead8 (addr, &val); + if constexpr (size == 16) dataabort = !cpu->DataRead16(addr, &val); + if constexpr (size == 32) dataabort = !cpu->DataRead32(addr, &val); + + cpu->AddCycles_CDI(); + if (dataabort) return; + + if constexpr (size == 8 && signror) val = (s32)(s8)val; + if constexpr (size == 16 && signror) val = (s32)(s16)val; + if constexpr (size == 32 && signror) val = ROR(val, ((addr&0x3)<<3)); + + if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; + + if (rd == 15) + { + if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; + cpu->JumpTo(val); + } + else cpu->R[rd] = val; +} + +template +void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +{ + static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); + + u32 addr; + if constexpr (writeback != Writeback::Post) addr = offset + cpu->R[rn]; + else addr = cpu->R[rn]; + + u32 storeval = cpu->R[rd]; + if (rd == 15) storeval += 4; + + bool dataabort; + if constexpr (size == 8) dataabort = !cpu->DataWrite8 (addr, storeval); + if constexpr (size == 16) dataabort = !cpu->DataWrite16(addr, storeval); + if constexpr (size == 32) dataabort = !cpu->DataWrite32(addr, storeval); + + cpu->AddCycles_CD(); + if (dataabort) return; + + if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; +} #define A_STR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ - storeval += 4; \ - bool dataabort = !cpu->DataWrite32(offset, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: user mode (bit21) #define A_STR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 0xF) \ - storeval += 4; \ - bool dataabort = !cpu->DataWrite32(addr, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_STRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ - bool dataabort = !cpu->DataWrite8(offset, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: user mode (bit21) #define A_STRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ - bool dataabort = !cpu->DataWrite8(addr, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDR \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = ROR(val, ((offset&0x3)<<3)); \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - { \ - if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; \ - cpu->JumpTo(val); \ - } \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - } + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: user mode #define A_LDR_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = ROR(val, ((addr&0x3)<<3)); \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) \ - { \ - if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; \ - cpu->JumpTo(val); \ - } \ - else \ - { \ - cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - } + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: user mode #define A_LDRB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); @@ -236,22 +235,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) #define A_STRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ - bool dataabort = !cpu->DataWrite16(offset, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_STRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \ - bool dataabort = !cpu->DataWrite16(addr, storeval); \ - cpu->AddCycles_CD(); \ - if (dataabort) return; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + StoreSingle<16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); // TODO: CHECK LDRD/STRD TIMINGS!! @@ -304,62 +292,25 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSB \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSB_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = (s32)(s8)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSH \ - offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRSH_POST \ - u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ - u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ - cpu->AddCycles_CDI(); \ - if (dataabort) return; \ - val = (s32)(s16)val; \ - if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ - else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ - cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; + LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_IMPLEMENT_HD_LDRSTR(x) \ @@ -651,149 +602,86 @@ void T_LDR_PCREL(ARM* cpu) void T_STR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDR_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - - u32 val; - if (cpu->DataRead32(addr, &val)) - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3)); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CD(); + StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRSB_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7])) - cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7]; - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]); - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRSH_REG(ARM* cpu) { - u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7]; - if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7])) - cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7]; - - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_STR_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); } void T_LDR_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 4) & 0x7C; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - u32 val; - if (cpu->DataRead32(offset, &val)) - cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3)); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); } void T_STRB_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); } void T_LDRB_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 6) & 0x1F; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); } void T_STRH_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); } void T_LDRH_IMM(ARM* cpu) { - u32 offset = (cpu->CurInstr >> 5) & 0x3E; - offset += cpu->R[(cpu->CurInstr >> 3) & 0x7]; - - cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); } void T_STR_SPREL(ARM* cpu) { - u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; - - cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CD(); + StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); } void T_LDR_SPREL(ARM* cpu) { - u32 offset = (cpu->CurInstr << 2) & 0x3FC; - offset += cpu->R[13]; - - cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); - cpu->AddCycles_CDI(); + LoadSingle(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); } From f692e7391af134e55e998e3e7e5e65c764f07927 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 26 Aug 2024 16:31:20 -0400 Subject: [PATCH 073/115] the docs lied to me (again) --- src/ARMInterpreter.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 6e6c9a8d..15ec42db 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -121,8 +121,7 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); - else cpu->AddCycles_C(); + cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) @@ -175,8 +174,7 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); - else cpu->AddCycles_C(); + cpu->AddCycles_C(); } void A_MRS(ARM* cpu) From a9aad74539392e55443b012e868ffbcc83585af8 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 26 Aug 2024 20:43:27 -0400 Subject: [PATCH 074/115] implement user mode load/stores --- src/ARMInterpreter_LoadStore.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 734b57d0..c9128666 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -63,6 +63,7 @@ enum class Writeback None = 0, Pre, Post, + Trans, }; template @@ -71,15 +72,27 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); u32 addr; - if constexpr (writeback != Writeback::Post) addr = offset + cpu->R[rn]; + if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; else addr = cpu->R[rn]; + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; + } + u32 val; bool dataabort; if constexpr (size == 8) dataabort = !cpu->DataRead8 (addr, &val); if constexpr (size == 16) dataabort = !cpu->DataRead16(addr, &val); if constexpr (size == 32) dataabort = !cpu->DataRead32(addr, &val); + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0 && (cpu->CPSR & 0x1F) != 0x10) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_PrivMap; + } + cpu->AddCycles_CDI(); if (dataabort) return; @@ -125,33 +138,33 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode (bit21) #define A_STR_POST \ - StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_STRB \ if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode (bit21) #define A_STRB_POST \ - StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDR \ if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode #define A_LDR_POST \ - LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRB \ if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); -// TODO: user mode #define A_LDRB_POST \ - LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); From be290da23ccf76a6f76d32347558c52c4bf67b6a Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:23:18 -0400 Subject: [PATCH 075/115] de-duplicate swp(b) --- src/ARMInterpreter_LoadStore.cpp | 43 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index c9128666..8deeaa4f 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -360,48 +360,45 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH) -void A_SWP(ARM* cpu) +template +inline void SWP(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF]; if ((cpu->CurInstr & 0xF) == 15) rm += 4; u32 val; - if (cpu->DataRead32(base, &val)) + if ((byte ? cpu->DataRead8 (base, &val) + : cpu->DataRead32(base, &val))) { u32 numD = cpu->DataCycles; - if (cpu->DataWrite32(base, rm)) + + if ((byte ? cpu->DataWrite8 (base, rm) + : cpu->DataWrite32(base, rm))) { // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); - else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? + + if constexpr (!byte) val = ROR(val, 8*(base&0x3)); + + if (rd != 15) cpu->R[rd] = val; + else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? } + cpu->DataCycles += numD; } + cpu->AddCycles_CDI(); } +void A_SWP(ARM* cpu) +{ + void SWP(ARM* cpu); +} + void A_SWPB(ARM* cpu) { - u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF; - if ((cpu->CurInstr & 0xF) == 15) rm += 4; - - u32 val; - if (cpu->DataRead8(base, &val)) - { - u32 numD = cpu->DataCycles; - if (cpu->DataWrite8(base, rm)) - { - // rd only gets updated if both read and write succeed - u32 rd = (cpu->CurInstr >> 12) & 0xF; - if (rd != 15) cpu->R[rd] = val; - else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? - } - cpu->DataCycles += numD; - } - cpu->AddCycles_CDI(); + void SWP(ARM* cpu); } From 685c4828a253e53ab52e0d10abff264055a429b4 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:45:46 -0400 Subject: [PATCH 076/115] try not forgetting about stores lol --- src/ARMInterpreter_LoadStore.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 8deeaa4f..a8f8cb1b 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -116,17 +116,29 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); u32 addr; - if constexpr (writeback != Writeback::Post) addr = offset + cpu->R[rn]; + if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; else addr = cpu->R[rn]; u32 storeval = cpu->R[rd]; if (rd == 15) storeval += 4; + + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; + } bool dataabort; if constexpr (size == 8) dataabort = !cpu->DataWrite8 (addr, storeval); if constexpr (size == 16) dataabort = !cpu->DataWrite16(addr, storeval); if constexpr (size == 32) dataabort = !cpu->DataWrite32(addr, storeval); + if constexpr (writeback == Writeback::Trans) + { + if (cpu->Num == 0 && (cpu->CPSR & 0x1F) != 0x10) + ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_PrivMap; + } + cpu->AddCycles_CD(); if (dataabort) return; From 00038217382c2666d598f1b0582698a4bb186a4f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 28 Aug 2024 22:04:22 -0400 Subject: [PATCH 077/115] apparently i never tested this --- src/ARMInterpreter_LoadStore.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index a8f8cb1b..659deaef 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -405,12 +405,12 @@ inline void SWP(ARM* cpu) void A_SWP(ARM* cpu) { - void SWP(ARM* cpu); + SWP(cpu); } void A_SWPB(ARM* cpu) { - void SWP(ARM* cpu); + SWP(cpu); } From c5ac682f04814d7c92d5e0eb8c85e093578f2f4b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:41:49 -0400 Subject: [PATCH 078/115] improve data abort handling further --- src/ARM.cpp | 2 +- src/ARM.h | 6 +- src/ARMInterpreter_LoadStore.cpp | 228 ++++++++++++++++++------------- src/CP15.cpp | 34 +++-- 4 files changed, 162 insertions(+), 108 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7cfa3589..a7c6c11e 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1217,7 +1217,7 @@ bool ARMv4::DataWrite32(u32 addr, u32 val) return true; } -bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort) +bool ARMv4::DataWrite32S(u32 addr, u32 val) { addr &= ~3; diff --git a/src/ARM.h b/src/ARM.h index 2603e646..26080b51 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -142,7 +142,7 @@ public: virtual bool DataWrite8(u32 addr, u8 val) = 0; virtual bool DataWrite16(u32 addr, u16 val) = 0; virtual bool DataWrite32(u32 addr, u32 val) = 0; - virtual bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) = 0; + virtual bool DataWrite32S(u32 addr, u32 val) = 0; virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; @@ -262,7 +262,7 @@ public: bool DataWrite8(u32 addr, u8 val) override; bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override { @@ -410,7 +410,7 @@ public: bool DataWrite8(u32 addr, u8 val) override; bool DataWrite16(u32 addr, u16 val) override; bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val, bool dataabort = false) override; + bool DataWrite32S(u32 addr, u32 val) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 659deaef..bbbe08fd 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -82,10 +82,10 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } u32 val; - bool dataabort; - if constexpr (size == 8) dataabort = !cpu->DataRead8 (addr, &val); - if constexpr (size == 16) dataabort = !cpu->DataRead16(addr, &val); - if constexpr (size == 32) dataabort = !cpu->DataRead32(addr, &val); + bool dabort; + if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, &val); + if constexpr (size == 16) dabort = !cpu->DataRead16(addr, &val); + if constexpr (size == 32) dabort = !cpu->DataRead32(addr, &val); if constexpr (writeback == Writeback::Trans) { @@ -94,8 +94,11 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } cpu->AddCycles_CDI(); - if (dataabort) return; - + if (dabort) + { + ((ARMv5*)cpu)->DataAbort(); + return; + } if constexpr (size == 8 && signror) val = (s32)(s8)val; if constexpr (size == 16 && signror) val = (s32)(s16)val; if constexpr (size == 32 && signror) val = ROR(val, ((addr&0x3)<<3)); @@ -128,10 +131,10 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; } - bool dataabort; - if constexpr (size == 8) dataabort = !cpu->DataWrite8 (addr, storeval); - if constexpr (size == 16) dataabort = !cpu->DataWrite16(addr, storeval); - if constexpr (size == 32) dataabort = !cpu->DataWrite32(addr, storeval); + bool dabort; + if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval); + if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval); + if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval); if constexpr (writeback == Writeback::Trans) { @@ -140,7 +143,11 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } cpu->AddCycles_CD(); - if (dataabort) return; + if (dabort) + { + ((ARMv5*)cpu)->DataAbort(); + return; + } if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; } @@ -273,8 +280,12 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ + bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \ + u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \ + if (dabort) { \ + cpu->AddCycles_CDI(); \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ @@ -285,8 +296,12 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \ - u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ + bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \ + u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \ + if (dabort) { \ + cpu->AddCycles_CDI(); \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ else cpu->R[r+1] = val; \ cpu->AddCycles_CDI(); \ @@ -297,11 +312,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ - dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \ + dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ - if (dataabort) return; \ + if (dabort) { \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; #define A_STRD_POST \ @@ -309,11 +326,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ - bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ - dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \ + dabort |= !cpu->DataWrite32S (addr+4, storeval); \ cpu->AddCycles_CD(); \ - if (dataabort) return; \ + if (dabort) { \ + ((ARMv5*)cpu)->DataAbort(); \ + return; } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; #define A_LDRH \ @@ -396,9 +415,11 @@ inline void SWP(ARM* cpu) if (rd != 15) cpu->R[rd] = val; else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? } + else ((ARMv5*)cpu)->DataAbort(); cpu->DataCycles += numD; } + else ((ARMv5*)cpu)->DataAbort(); cpu->AddCycles_CDI(); } @@ -423,6 +444,7 @@ void A_LDM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + bool dabort = false; if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -451,11 +473,12 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]) - : cpu->DataRead32S(base, &cpu->R[i]))) - { - goto dataabort; - } + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + // remaining loads still occur but are not written to a reg after a data abort is raised + if (!dabort) cpu->R[i] = val; first = false; if (!preinc) base += 4; @@ -466,11 +489,8 @@ void A_LDM(ARM* cpu) if ((cpu->CurInstr & (1<<15))) { if (preinc) base += 4; - if (!(first ? cpu->DataRead32 (base, &pc) - : cpu->DataRead32S(base, &pc))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc)); if (!preinc) base += 4; @@ -482,6 +502,14 @@ void A_LDM(ARM* cpu) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + // handle data aborts + if (dabort) + { + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + // writeback to base if (cpu->CurInstr & (1<<21)) { @@ -506,19 +534,6 @@ void A_LDM(ARM* cpu) if (cpu->CurInstr & (1<<15)) cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); - // jump here if a data abort occurred; writeback is ignored, and any jumps were aborted - if (false) - { - dataabort: - - // switch back to original set of regs - if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) - cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - - // restore original value of base in case the reg got written to - cpu->R[baseid] = oldbase; - } - cpu->AddCycles_CDI(); } @@ -529,6 +544,7 @@ void A_STM(ARM* cpu) u32 oldbase = base; u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; + bool dabort = false; if (!(cpu->CurInstr & (1<<23))) { @@ -573,11 +589,8 @@ void A_STM(ARM* cpu) if (i == 15) val+=4; - if (!(first ? cpu->DataWrite32 (base, val) - : cpu->DataWrite32S(base, val))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataWrite32 (base, val) + : cpu->DataWrite32S(base, val)); first = false; @@ -587,21 +600,20 @@ void A_STM(ARM* cpu) if (cpu->CurInstr & (1<<22)) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + + // handle data aborts + if (dabort) + { + // restore original value of base + cpu->R[baseid] = oldbase; + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) cpu->R[baseid] = base; - // jump here if a data abort occurred - if (false) - { - dataabort: - - if (cpu->CurInstr & (1<<22)) - cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); - - // restore original value of base - cpu->R[baseid] = oldbase; - } cpu->AddCycles_CD(); } @@ -616,9 +628,13 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); - cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); + bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); + if (dabort) + { + ((ARMv5*)cpu)->DataAbort(); + } } @@ -711,6 +727,7 @@ void T_PUSH(ARM* cpu) { int nregs = 0; bool first = true; + bool dabort = false; for (int i = 0; i < 8; i++) { @@ -729,11 +746,9 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i]))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i])); + first = false; base += 4; } @@ -741,16 +756,19 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) { - if (!(first ? cpu->DataWrite32 (base, cpu->R[14]) - : cpu->DataWrite32S(base, cpu->R[14]))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14]) + : cpu->DataWrite32S(base, cpu->R[14])); + } + + if (dabort) + { + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; } cpu->R[13] = wbbase; - dataabort: cpu->AddCycles_CD(); } @@ -758,16 +776,18 @@ void T_POP(ARM* cpu) { u32 base = cpu->R[13]; bool first = true; + bool dabort = false; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]) - : cpu->DataRead32S(base, &cpu->R[i]))) - { - goto dataabort; - } + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + if (!dabort) cpu->R[i] = val; + first = false; base += 4; } @@ -776,19 +796,25 @@ void T_POP(ARM* cpu) if (cpu->CurInstr & (1<<8)) { u32 pc; - if (!(first ? cpu->DataRead32 (base, &pc) - : cpu->DataRead32S(base, &pc))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataRead32 (base, &pc) + : cpu->DataRead32S(base, &pc)); + + if (dabort) goto dataabort; if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; cpu->JumpTo(pc); base += 4; } + if (dabort) + { + dataabort: + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + cpu->R[13] = base; - dataabort: cpu->AddCycles_CDI(); } @@ -796,24 +822,29 @@ void T_STMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; + bool dabort = false; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i]))) - { - goto dataabort; - } + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i]) + : cpu->DataWrite32S(base, cpu->R[i])); + first = false; base += 4; } } + if (dabort) + { + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + // TODO: check "Rb included in Rlist" case cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - dataabort: cpu->AddCycles_CD(); } @@ -821,25 +852,32 @@ void T_LDMIA(ARM* cpu) { u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; + bool dabort = false; for (int i = 0; i < 8; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &cpu->R[i]) - : cpu->DataRead32S(base, &cpu->R[i]))) - { - goto dataabort; - } + u32 val; + dabort |= !(first ? cpu->DataRead32 (base, &val) + : cpu->DataRead32S(base, &val)); + + if (!dabort) cpu->R[i] = val; first = false; base += 4; } } + if (dabort) + { + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; - dataabort: cpu->AddCycles_CDI(); } diff --git a/src/CP15.cpp b/src/CP15.cpp index 6fcaff93..5bffb185 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -806,9 +806,11 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) bool ARMv5::DataRead8(u32 addr, u32* val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -832,9 +834,11 @@ bool ARMv5::DataRead8(u32 addr, u32* val) bool ARMv5::DataRead16(u32 addr, u32* val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -860,9 +864,11 @@ bool ARMv5::DataRead16(u32 addr, u32* val) bool ARMv5::DataRead32(u32 addr, u32* val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -888,9 +894,11 @@ bool ARMv5::DataRead32(u32 addr, u32* val) bool ARMv5::DataRead32S(u32 addr, u32* val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] { - DataAbort(); + DataCycles += 1; return false; } @@ -916,9 +924,11 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) bool ARMv5::DataWrite8(u32 addr, u8 val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -943,9 +953,11 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) bool ARMv5::DataWrite16(u32 addr, u16 val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -972,9 +984,11 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) bool ARMv5::DataWrite32(u32 addr, u32 val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - DataAbort(); + DataCycles = 1; return false; } @@ -999,11 +1013,13 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) return true; } -bool ARMv5::DataWrite32S(u32 addr, u32 val, bool dataabort) +bool ARMv5::DataWrite32S(u32 addr, u32 val) { + // Data Aborts + // Exception is handled in the actual instruction implementation if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]] { - if (!dataabort) DataAbort(); + DataCycles += 1; return false; } From a0d71135a1ff2ddd55b0e26b5b55ef5260fdb448 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 13 Sep 2024 07:33:18 -0400 Subject: [PATCH 079/115] very minor optimization attempt --- src/ARMInterpreter_LoadStore.cpp | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index bbbe08fd..bf187aca 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -94,7 +94,7 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } cpu->AddCycles_CDI(); - if (dabort) + if (dabort) [[unlikely]] { ((ARMv5*)cpu)->DataAbort(); return; @@ -143,7 +143,7 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } cpu->AddCycles_CD(); - if (dabort) + if (dabort) [[unlikely]] { ((ARMv5*)cpu)->DataAbort(); return; @@ -316,7 +316,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ cpu->AddCycles_CD(); \ - if (dabort) { \ + if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; @@ -330,7 +330,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ dabort |= !cpu->DataWrite32S (addr+4, storeval); \ cpu->AddCycles_CD(); \ - if (dabort) { \ + if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; @@ -400,12 +400,12 @@ inline void SWP(ARM* cpu) u32 val; if ((byte ? cpu->DataRead8 (base, &val) - : cpu->DataRead32(base, &val))) + : cpu->DataRead32(base, &val))) [[likely]] { u32 numD = cpu->DataCycles; if ((byte ? cpu->DataWrite8 (base, rm) - : cpu->DataWrite32(base, rm))) + : cpu->DataWrite32(base, rm))) [[likely]] { // rd only gets updated if both read and write succeed u32 rd = (cpu->CurInstr >> 12) & 0xF; @@ -478,7 +478,7 @@ void A_LDM(ARM* cpu) : cpu->DataRead32S(base, &val)); // remaining loads still occur but are not written to a reg after a data abort is raised - if (!dabort) cpu->R[i] = val; + if (!dabort) [[likely]] cpu->R[i] = val; first = false; if (!preinc) base += 4; @@ -503,7 +503,7 @@ void A_LDM(ARM* cpu) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); // handle data aborts - if (dabort) + if (dabort) [[unlikely]] { cpu->AddCycles_CDI(); ((ARMv5*)cpu)->DataAbort(); @@ -602,7 +602,7 @@ void A_STM(ARM* cpu) cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); // handle data aborts - if (dabort) + if (dabort) [[unlikely]] { // restore original value of base cpu->R[baseid] = oldbase; @@ -631,7 +631,7 @@ void T_LDR_PCREL(ARM* cpu) bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); cpu->AddCycles_CDI(); - if (dabort) + if (dabort) [[unlikely]] { ((ARMv5*)cpu)->DataAbort(); } @@ -760,7 +760,7 @@ void T_PUSH(ARM* cpu) : cpu->DataWrite32S(base, cpu->R[14])); } - if (dabort) + if (dabort) [[unlikely]] { cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); @@ -786,7 +786,7 @@ void T_POP(ARM* cpu) dabort |= !(first ? cpu->DataRead32 (base, &val) : cpu->DataRead32S(base, &val)); - if (!dabort) cpu->R[i] = val; + if (!dabort) [[likely]] cpu->R[i] = val; first = false; base += 4; @@ -799,13 +799,13 @@ void T_POP(ARM* cpu) dabort |= !(first ? cpu->DataRead32 (base, &pc) : cpu->DataRead32S(base, &pc)); - if (dabort) goto dataabort; + if (dabort) [[unlikely]] goto dataabort; if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; cpu->JumpTo(pc); base += 4; } - if (dabort) + if (dabort) [[unlikely]] { dataabort: cpu->AddCycles_CDI(); @@ -836,7 +836,7 @@ void T_STMIA(ARM* cpu) } } - if (dabort) + if (dabort) [[unlikely]] { cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); @@ -862,13 +862,13 @@ void T_LDMIA(ARM* cpu) dabort |= !(first ? cpu->DataRead32 (base, &val) : cpu->DataRead32S(base, &val)); - if (!dabort) cpu->R[i] = val; + if (!dabort) [[likely]] cpu->R[i] = val; first = false; base += 4; } } - if (dabort) + if (dabort) [[unlikely]] { cpu->AddCycles_CDI(); ((ARMv5*)cpu)->DataAbort(); From 3b9a9e4eb3d8de840ceab9e0ff57c8dc1092d6a5 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:23:15 -0400 Subject: [PATCH 080/115] multiply instructions can't write to r15 --- src/ARMInterpreter_ALU.cpp | 62 ++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 37c79904..350ed168 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -766,7 +766,9 @@ void A_MUL(ARM* cpu) u32 res = rm * rs; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) // check arm7 + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, @@ -795,8 +797,10 @@ void A_MLA(ARM* cpu) u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF]; u32 res = (rm * rs) + rn; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) // check arm7 + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (cpu->CurInstr & (1<<20)) { cpu->SetNZ(res & 0x80000000, @@ -825,8 +829,11 @@ void A_UMULL(ARM* cpu) u64 res = (u64)rm * (u64)rs; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), @@ -857,9 +864,12 @@ void A_UMLAL(ARM* cpu) u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); res += rd; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), @@ -887,9 +897,12 @@ void A_SMULL(ARM* cpu) u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF]; s64 res = (s64)(s32)rm * (s64)(s32)rs; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), @@ -920,9 +933,12 @@ void A_SMLAL(ARM* cpu) s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); if (cpu->CurInstr & (1<<20)) { cpu->SetNZ((u32)(res >> 63ULL), @@ -959,8 +975,10 @@ void A_SMLAxy(ARM* cpu) u32 res_mul = ((s16)rm * (s16)rs); u32 res = res_mul + rn; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; @@ -980,8 +998,9 @@ void A_SMLAWy(ARM* cpu) u32 res_mul = ((s64)(s32)rm * (s16)rs) >> 16; u32 res = res_mul + rn; - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (OverflowAdd(res_mul, rn)) cpu->CPSR |= 0x08000000; @@ -1001,8 +1020,9 @@ void A_SMULxy(ARM* cpu) else rs &= 0xFFFF; u32 res = ((s16)rm * (s16)rs); - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1017,8 +1037,9 @@ void A_SMULWy(ARM* cpu) else rs &= 0xFFFF; u32 res = ((s64)(s32)rm * (s16)rs) >> 16; - - cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1039,8 +1060,11 @@ void A_SMLALxy(ARM* cpu) s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); res += rd; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; - cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res; + + if (((cpu->CurInstr >> 16) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); cpu->AddCycles_CI(1); // TODO: interlock?? } From ac8c942565f956402f681c9cc8fa8b6eb6e0e74b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:10:13 -0400 Subject: [PATCH 081/115] sat add/sub also fail to jump --- src/ARMInterpreter_ALU.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 350ed168..54c1d6d3 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1110,7 +1110,9 @@ void A_QADD(ARM* cpu) cpu->CPSR |= 0x08000000; } - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; + cpu->AddCycles_C(); // TODO: interlock?? } @@ -1127,8 +1129,10 @@ void A_QSUB(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1153,8 +1157,10 @@ void A_QDADD(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } @@ -1179,8 +1185,10 @@ void A_QDSUB(ARM* cpu) res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000; cpu->CPSR |= 0x08000000; } + + if (((cpu->CurInstr >> 12) & 0xF) != 15) + cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->AddCycles_C(); // TODO: interlock?? } From e2f3dd1e6f1ae6602c5dc63f65ffb4908203ad7f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:27:36 -0400 Subject: [PATCH 082/115] clarify --- src/ARMInterpreter_ALU.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 54c1d6d3..46c703cd 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -766,7 +766,8 @@ void A_MUL(ARM* cpu) u32 res = rm * rs; - if (((cpu->CurInstr >> 16) & 0xF) != 15) // check arm7 + // all multiply instructions fail writes to r15 on arm7/9 + if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (cpu->CurInstr & (1<<20)) @@ -798,7 +799,7 @@ void A_MLA(ARM* cpu) u32 res = (rm * rs) + rn; - if (((cpu->CurInstr >> 16) & 0xF) != 15) // check arm7 + if (((cpu->CurInstr >> 16) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; if (cpu->CurInstr & (1<<20)) @@ -1110,6 +1111,7 @@ void A_QADD(ARM* cpu) cpu->CPSR |= 0x08000000; } + // all saturated math instructions fail writes to r15 if (((cpu->CurInstr >> 12) & 0xF) != 15) cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; From e5654ec541528f606b27c854bb4d2ae981ab79d2 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:50:09 -0400 Subject: [PATCH 083/115] r15 mrc mrs --- src/ARMInterpreter.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 15ec42db..8ce15db1 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -201,7 +201,12 @@ void A_MRS(ARM* cpu) else psr = cpu->CPSR; - cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; + if (((cpu->CurInstr>>12) & 0xF) == 15) + { + if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme + cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb? + } + else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; if (cpu->Num != 1) cpu->AddCycles_CI(1); // arm9 else cpu->AddCycles_C(); // arm7 @@ -248,12 +253,13 @@ void A_MRC(ARM* cpu) u32 cn = (cpu->CurInstr >> 16) & 0xF; u32 cm = cpu->CurInstr & 0xF; u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; + u32 rd = (cpu->CurInstr>>12) & 0xF; - if (cpu->Num==0 && cp==15) + if (cpu->Num==0 && cp==15 && rd!=15) { - cpu->R[(cpu->CurInstr>>12)&0xF] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); + cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); } - else if (cpu->Num==1 && cp==14) + else if (cpu->Num==1 && cp==14 && rd!=15) { Log(LogLevel::Debug, "MRC p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo); } From 89e8549a556c0172feece95ce35dfeb61b01f2c1 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 16 Sep 2024 21:27:31 -0400 Subject: [PATCH 084/115] implement comparison instrs w/ rd == 15 --- src/ARMInterpreter_ALU.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 46c703cd..f04ab9b5 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -583,6 +583,11 @@ A_IMPLEMENT_ALU_OP(RSC,) u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + { \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING TST w/ rd == 15???"); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TST,_S) @@ -593,6 +598,11 @@ A_IMPLEMENT_ALU_TEST(TST,_S) u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + { \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING TEQ w/ rd == 15???"); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(TEQ,_S) @@ -605,6 +615,11 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) !res, \ CarrySub(a, b), \ OverflowSub(a, b)); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + { \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING CMP w/ rd == 15???"); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMP,) @@ -617,6 +632,11 @@ A_IMPLEMENT_ALU_TEST(CMP,) !res, \ CarryAdd(a, b), \ OverflowAdd(a, b)); \ + if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + { \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING CMN w/ rd == 15???"); \ + } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); A_IMPLEMENT_ALU_TEST(CMN,) @@ -1569,6 +1589,11 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); + if (rd == 15) \ + { \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15."); \ + } \ cpu->AddCycles_C(); } From 6ebabde39217e948406f4aff123f8a4afdf7f30b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 18 Sep 2024 19:23:23 -0400 Subject: [PATCH 085/115] implement changing thumb bit. and bkpt ig probably wrong --- src/ARM.cpp | 33 ++++++++++++++++++++++++++++++--- src/ARM.h | 6 +++++- src/ARMInterpreter.cpp | 20 ++++++++++++++------ src/ARMInterpreter.h | 1 + src/ARM_InstrInfo.cpp | 1 + src/ARM_InstrTable.h | 2 +- 6 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index a7c6c11e..c7fea92d 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -201,6 +201,13 @@ void ARMv5::Reset() ARM::Reset(); } +void ARMv4::Reset() +{ + Thumb = false; + + ARM::Reset(); +} + void ARM::DoSavestate(Savestate* file) { @@ -395,6 +402,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) Cycles += NDS.ARM7MemTimings[CodeCycles][0] + NDS.ARM7MemTimings[CodeCycles][1]; CPSR |= 0x20; + Thumb = true; } else { @@ -408,6 +416,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) Cycles += NDS.ARM7MemTimings[CodeCycles][2] + NDS.ARM7MemTimings[CodeCycles][3]; CPSR &= ~0x20; + Thumb = false; } } @@ -724,7 +733,12 @@ void ARMv5::Execute() ARMInterpreter::A_BLX_IMM(this); } else - AddCycles_C(); + { + if ((((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0)) == 0x127) + ARMInterpreter::A_BKPT(this); // always passes regardless of condition code + else + AddCycles_C(); + } } // TODO optimize this shit!!! @@ -826,8 +840,11 @@ void ARMv4::Execute() else #endif { - if (CPSR & 0x20) // THUMB + if (Thumb) // THUMB { + Thumb = (CPSR & 0x20); + bool fix = !Thumb; + if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); @@ -841,12 +858,22 @@ void ARMv4::Execute() else { // actually execute - u32 icode = (CurInstr >> 6); + u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); } + + if (fix) [[unlikely]] + { + // probably wrong? + // fixup + R[15] &= ~0x3; + NextInstr[1] = CodeRead32(R[15]); + } } else { + Thumb = (CPSR & 0x20); + if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); diff --git a/src/ARM.h b/src/ARM.h index 26080b51..8d640a30 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -385,6 +385,8 @@ class ARMv4 : public ARM public: ARMv4(melonDS::NDS& nds, std::optional gdb, bool jit); + void Reset() override; + void FillPipeline() override; void JumpTo(u32 addr, bool restorecpsr = false) override; @@ -393,7 +395,7 @@ public: template void Execute(); - u16 CodeRead16(u32 addr) + u32 CodeRead16(u32 addr) { return BusRead16(addr); } @@ -403,6 +405,8 @@ public: return BusRead32(addr); } + bool Thumb; + bool DataRead8(u32 addr, u32* val) override; bool DataRead16(u32 addr, u32* val) override; bool DataRead32(u32 addr, u32* val) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 8ce15db1..979e3bb8 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -69,6 +69,14 @@ void T_UNK(ARM* cpu) cpu->JumpTo(cpu->ExceptionBase + 0x04); } +void A_BKPT(ARM* cpu) +{ + if (cpu->Num == 1) A_UNK(cpu); // checkme + + Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message + ((ARMv5*)cpu)->PrefetchAbort(); +} + void A_MSR_IMM(ARM* cpu) @@ -105,9 +113,6 @@ void A_MSR_IMM(ARM* cpu) //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); - if (!(cpu->CurInstr & (1<<22))) - mask &= 0xFFFFFFDF; - if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; u32 val = ROR((cpu->CurInstr & 0xFF), ((cpu->CurInstr >> 7) & 0x1E)); @@ -121,6 +126,9 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); + if (cpu->Num == 0) + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + cpu->AddCycles_C(); } @@ -158,9 +166,6 @@ void A_MSR_REG(ARM* cpu) //if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9 if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000); - if (!(cpu->CurInstr & (1<<22))) - mask &= 0xFFFFFFDF; - if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00; u32 val = cpu->R[cpu->CurInstr & 0xF]; @@ -173,6 +178,9 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); + + if (cpu->Num == 0) + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter.h b/src/ARMInterpreter.h index 1066ac69..4c5ddafe 100644 --- a/src/ARMInterpreter.h +++ b/src/ARMInterpreter.h @@ -36,6 +36,7 @@ void A_MRS(ARM* cpu); void A_MCR(ARM* cpu); void A_MRC(ARM* cpu); void A_SVC(ARM* cpu); +void A_BKPT(ARM* cpu); void T_SVC(ARM* cpu); diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index 58838307..d1be9761 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -194,6 +194,7 @@ const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX); const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG); const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK); +const u32 A_BKPT = A_BranchAlways | A_Link | ak(ak_UNK); const u32 A_MSR_IMM = ak(ak_MSR_IMM); const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG); const u32 A_MRS = A_Write12 | ak(ak_MRS); diff --git a/src/ARM_InstrTable.h b/src/ARM_InstrTable.h index 8213c2e0..2c480f8d 100644 --- a/src/ARM_InstrTable.h +++ b/src/ARM_InstrTable.h @@ -130,7 +130,7 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) = // 0001 0010 0000 A_MSR_REG, A_BX, A_UNK, A_BLX_REG, - A_UNK, A_QSUB, A_UNK, A_UNK, + A_UNK, A_QSUB, A_UNK, A_BKPT, A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG, A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG, From 45f87a1c8d529289f031619e3b13d4e6d67c3d57 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 19 Sep 2024 20:57:55 -0400 Subject: [PATCH 086/115] prevent t bit changes without pipeline flush on arm7 idk what's happening fully and its gonna be slow to emulate most likely we'll figure this out later --- src/ARM.cpp | 26 ++------------ src/ARM.h | 6 +--- src/ARMInterpreter.cpp | 24 ++++++++++--- src/ARMInterpreter_ALU.cpp | 71 +++++++++++++++++++++++++++++++------- 4 files changed, 80 insertions(+), 47 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index c7fea92d..6518b751 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -201,13 +201,6 @@ void ARMv5::Reset() ARM::Reset(); } -void ARMv4::Reset() -{ - Thumb = false; - - ARM::Reset(); -} - void ARM::DoSavestate(Savestate* file) { @@ -402,7 +395,6 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) Cycles += NDS.ARM7MemTimings[CodeCycles][0] + NDS.ARM7MemTimings[CodeCycles][1]; CPSR |= 0x20; - Thumb = true; } else { @@ -416,7 +408,6 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) Cycles += NDS.ARM7MemTimings[CodeCycles][2] + NDS.ARM7MemTimings[CodeCycles][3]; CPSR &= ~0x20; - Thumb = false; } } @@ -840,11 +831,8 @@ void ARMv4::Execute() else #endif { - if (Thumb) // THUMB + if (CPSR & 0x20) // THUMB { - Thumb = (CPSR & 0x20); - bool fix = !Thumb; - if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); @@ -858,22 +846,12 @@ void ARMv4::Execute() else { // actually execute - u32 icode = (CurInstr >> 6) & 0x3FF; + u32 icode = (CurInstr >> 6); ARMInterpreter::THUMBInstrTable[icode](this); } - - if (fix) [[unlikely]] - { - // probably wrong? - // fixup - R[15] &= ~0x3; - NextInstr[1] = CodeRead32(R[15]); - } } else { - Thumb = (CPSR & 0x20); - if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); diff --git a/src/ARM.h b/src/ARM.h index 8d640a30..26080b51 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -385,8 +385,6 @@ class ARMv4 : public ARM public: ARMv4(melonDS::NDS& nds, std::optional gdb, bool jit); - void Reset() override; - void FillPipeline() override; void JumpTo(u32 addr, bool restorecpsr = false) override; @@ -395,7 +393,7 @@ public: template void Execute(); - u32 CodeRead16(u32 addr) + u16 CodeRead16(u32 addr) { return BusRead16(addr); } @@ -405,8 +403,6 @@ public: return BusRead32(addr); } - bool Thumb; - bool DataRead8(u32 addr, u32* val) override; bool DataRead16(u32 addr, u32* val) override; bool DataRead32(u32 addr, u32* val) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 979e3bb8..b11913ef 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -126,8 +126,15 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - if (cpu->Num == 0) - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + { + if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + else + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR IMM T bit change on ARM7\n"); + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + } + } cpu->AddCycles_C(); } @@ -178,9 +185,16 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - - if (cpu->Num == 0) - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + { + if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + else + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + } + } cpu->AddCycles_C(); } diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index f04ab9b5..fd60b5f0 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -585,8 +585,17 @@ A_IMPLEMENT_ALU_OP(RSC,) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING TST w/ rd == 15???"); \ + if (cpu->Num == 1) \ + { \ + u32 oldcpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \ + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + } \ + } \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -600,8 +609,17 @@ A_IMPLEMENT_ALU_TEST(TST,_S) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING TEQ w/ rd == 15???"); \ + if (cpu->Num == 1) \ + { \ + u32 oldcpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \ + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + } \ + } \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -617,8 +635,17 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) OverflowSub(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING CMP w/ rd == 15???"); \ + if (cpu->Num == 1) \ + { \ + u32 oldcpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \ + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + } \ + } \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -634,8 +661,17 @@ A_IMPLEMENT_ALU_TEST(CMP,) OverflowAdd(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: WHO IS USING CMN w/ rd == 15???"); \ + if (cpu->Num == 1) \ + { \ + u32 oldcpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \ + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + } \ + } \ + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -1589,11 +1625,20 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); - if (rd == 15) \ - { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15."); \ - } \ + if (rd == 15) + { + if (cpu->Num == 1) + { + u32 oldcpsr = cpu->CPSR; + cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. + if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + } + } + else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15 on ARM9\n"); + } cpu->AddCycles_C(); } From c1338147137dab672034c9f3074c1bace7b31e4f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 20 Sep 2024 04:39:16 -0400 Subject: [PATCH 087/115] some day i will remember to test before pushing --- src/ARMInterpreter_ALU.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index fd60b5f0..a638a49c 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -587,7 +587,7 @@ A_IMPLEMENT_ALU_OP(RSC,) { \ if (cpu->Num == 1) \ { \ - u32 oldcpsr = cpu->CPSR; \ + u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ { \ @@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) { \ if (cpu->Num == 1) \ { \ - u32 oldcpsr = cpu->CPSR; \ + u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ { \ @@ -637,7 +637,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) { \ if (cpu->Num == 1) \ { \ - u32 oldcpsr = cpu->CPSR; \ + u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ { \ @@ -663,7 +663,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) { \ if (cpu->Num == 1) \ { \ - u32 oldcpsr = cpu->CPSR; \ + u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ { \ @@ -1629,7 +1629,7 @@ void T_CMP_HIREG(ARM* cpu) { if (cpu->Num == 1) { - u32 oldcpsr = cpu->CPSR; + u32 oldpsr = cpu->CPSR; cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) { From 7afa805afc3719b145424c642f3858d3be75169a Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 20 Sep 2024 05:37:51 -0400 Subject: [PATCH 088/115] slightly better code --- src/ARMInterpreter.cpp | 10 +++++----- src/ARMInterpreter_ALU.cpp | 30 +++++++++++++++--------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index b11913ef..72d1e189 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -126,13 +126,13 @@ void A_MSR_IMM(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + if (cpu->CPSR & 0x20) [[unlikely]] { if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this else { - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR IMM T bit change on ARM7\n"); - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } @@ -186,13 +186,13 @@ void A_MSR_REG(ARM* cpu) if (!(cpu->CurInstr & (1<<22))) cpu->UpdateMode(oldpsr, cpu->CPSR); - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + if (cpu->CPSR & 0x20) [[unlikely]] { if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this else { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index a638a49c..9305fc42 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -583,16 +583,16 @@ A_IMPLEMENT_ALU_OP(RSC,) u32 res = a & b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ if (cpu->Num == 1) \ { \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + if (cpu->CPSR & 0x20) \ { \ Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \ - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST w/ rd == 15 on ARM9\n"); \ @@ -607,16 +607,16 @@ A_IMPLEMENT_ALU_TEST(TST,_S) u32 res = a ^ b; \ cpu->SetNZ(res & 0x80000000, \ !res); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ if (cpu->Num == 1) \ { \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + if (cpu->CPSR & 0x20) \ { \ Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \ - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ w/ rd == 15 on ARM9\n"); \ @@ -633,16 +633,16 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) !res, \ CarrySub(a, b), \ OverflowSub(a, b)); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ if (cpu->Num == 1) \ { \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + if (cpu->CPSR & 0x20) \ { \ Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \ - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP w/ rd == 15 on ARM9\n"); \ @@ -659,16 +659,16 @@ A_IMPLEMENT_ALU_TEST(CMP,) !res, \ CarryAdd(a, b), \ OverflowAdd(a, b)); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ if (cpu->Num == 1) \ { \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) \ + if (cpu->CPSR & 0x20) \ { \ Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \ - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); /* keep it from crashing the emulator at least */ \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN w/ rd == 15 on ARM9\n"); \ @@ -1625,16 +1625,16 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); - if (rd == 15) + if (rd == 15) [[unlikely]] { if (cpu->Num == 1) { u32 oldpsr = cpu->CPSR; cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. - if (!(oldpsr & 0x20) && (cpu->CPSR & 0x20)) + if (!(cpu->CPSR & 0x20)) { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); - cpu->CPSR = (cpu->CPSR & ~0x20) | (oldpsr & 0x20); // keep it from crashing the emulator at least + cpu->CPSR |= 0x20; // keep it from crashing the emulator at least } } else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15 on ARM9\n"); From 157e9c5b046199658d5c5e12a3a5b29bf944a451 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:34:27 -0400 Subject: [PATCH 089/115] reimplement changing t bit with arm7 kinda slow though? --- src/ARM.cpp | 52 +++++++++++++++++++++++++++-------- src/ARM.h | 3 +++ src/ARMInterpreter.cpp | 4 +-- src/ARMInterpreter_ALU.cpp | 55 ++++---------------------------------- 4 files changed, 51 insertions(+), 63 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6518b751..ade9649f 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -385,6 +385,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) if (addr & 0x1) { + Thumb = true; addr &= ~0x1; R[15] = addr+2; @@ -398,6 +399,7 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) } else { + Thumb = false; addr &= ~0x3; R[15] = addr+4; @@ -831,35 +833,63 @@ void ARMv4::Execute() else #endif { - if (CPSR & 0x20) // THUMB + if (Thumb) // THUMB { + // attempt to delay t bit changes without a pipeline flush (msr) by one instruction + Thumb = CPSR & 0x20; if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); // prefetch - R[15] += 2; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15]); + // thumb bit can change without a flush and is usually delayed 1 instruction + // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch + if (!Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme + { + R[15] = (R[15] + 4) & ~0x3; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead32(R[15]); + } + else + { + R[15] += 2; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead16(R[15]); + } if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else { // actually execute - u32 icode = (CurInstr >> 6); + u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); } } else { + // attempt to delay t bit changes without a pipeline flush (msr) by one instruction + Thumb = CPSR & 0x20; if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); - + // prefetch - R[15] += 4; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead32(R[15]); + // thumb bit can change without a flush and is usually delayed 1 instruction + // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch + if (Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme? + { + R[15] = (R[15] + 4) & ~0x3; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead16(R[15]); + } + else + { + R[15] = (R[15] + 4) & ~0x3; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead32(R[15]); + } if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else if (CheckCondition(CurInstr >> 28)) // actually execute diff --git a/src/ARM.h b/src/ARM.h index 26080b51..81d6be39 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -416,6 +416,9 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; +private: + bool Thumb; + protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 72d1e189..cc19df3b 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -129,11 +129,11 @@ void A_MSR_IMM(ARM* cpu) if (cpu->CPSR & 0x20) [[unlikely]] { if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this - else + /*else { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least - } + }*/ } cpu->AddCycles_C(); diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 9305fc42..abe2bce0 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -585,16 +585,7 @@ A_IMPLEMENT_ALU_OP(RSC,) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) \ - { \ - u32 oldpsr = cpu->CPSR; \ - cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (cpu->CPSR & 0x20) \ - { \ - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \ - cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ - } \ - } \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -609,16 +600,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) \ - { \ - u32 oldpsr = cpu->CPSR; \ - cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (cpu->CPSR & 0x20) \ - { \ - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \ - cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ - } \ - } \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -635,16 +617,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) OverflowSub(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) \ - { \ - u32 oldpsr = cpu->CPSR; \ - cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (cpu->CPSR & 0x20) \ - { \ - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \ - cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ - } \ - } \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -661,16 +634,7 @@ A_IMPLEMENT_ALU_TEST(CMP,) OverflowAdd(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) \ - { \ - u32 oldpsr = cpu->CPSR; \ - cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ - if (cpu->CPSR & 0x20) \ - { \ - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \ - cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ - } \ - } \ + if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -1627,16 +1591,7 @@ void T_CMP_HIREG(ARM* cpu) OverflowSub(a, b)); if (rd == 15) [[unlikely]] { - if (cpu->Num == 1) - { - u32 oldpsr = cpu->CPSR; - cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. - if (!(cpu->CPSR & 0x20)) - { - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); - cpu->CPSR |= 0x20; // keep it from crashing the emulator at least - } - } + if (cpu->Num == 1) cpu->RestoreCPSR(); // ARM7 restores cpsr and does ___not___ flush the pipeline. else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15 on ARM9\n"); } cpu->AddCycles_C(); From 8d451dff48b3932225d9d2b222ec7bdeedbda265 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 20 Sep 2024 23:47:40 -0400 Subject: [PATCH 090/115] misaligned pc.......... --- src/ARM.cpp | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index ade9649f..509027c5 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -837,6 +837,7 @@ void ARMv4::Execute() { // attempt to delay t bit changes without a pipeline flush (msr) by one instruction Thumb = CPSR & 0x20; + bool fix = !Thumb; if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); @@ -845,17 +846,17 @@ void ARMv4::Execute() // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch if (!Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme { - R[15] = (R[15] + 4) & ~0x3; + R[15] += 4; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead32(R[15]); + NextInstr[1] = CodeRead32(R[15] & ~3); } - else + else [[likely]] { R[15] += 2; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15]); + NextInstr[1] = CodeRead16(R[15] & ~1); } if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); @@ -865,6 +866,12 @@ void ARMv4::Execute() u32 icode = (CurInstr >> 6) & 0x3FF; ARMInterpreter::THUMBInstrTable[icode](this); } + + if (fix) [[unlikely]] // attempt at fixing flushless t bit changes + { + R[15] += 2; // yes it can end up misaligned. that's correct. + NextInstr[1] = CodeRead32(R[15] & ~3); + } } else { @@ -878,17 +885,17 @@ void ARMv4::Execute() // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch if (Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme? { - R[15] = (R[15] + 4) & ~0x3; + R[15] += 4; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15]); + NextInstr[1] = CodeRead16(R[15] & ~1); } - else + else [[likely]] { - R[15] = (R[15] + 4) & ~0x3; + R[15] += 4; CurInstr = NextInstr[0]; NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead32(R[15]); + NextInstr[1] = CodeRead32(R[15] & ~3); } if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); From 7b0d71dbbedcea1ab0a311147a75cdd3909d0995 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 22 Sep 2024 19:57:33 -0400 Subject: [PATCH 091/115] Revert T bit changing support for arm7 i cannot comprehend what is happening currently --- src/ARM.cpp | 59 +++++++------------------------------- src/ARM.h | 3 -- src/ARMInterpreter.cpp | 4 +-- src/ARMInterpreter_ALU.cpp | 55 +++++++++++++++++++++++++++++++---- 4 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 509027c5..6518b751 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -385,7 +385,6 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) if (addr & 0x1) { - Thumb = true; addr &= ~0x1; R[15] = addr+2; @@ -399,7 +398,6 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) } else { - Thumb = false; addr &= ~0x3; R[15] = addr+4; @@ -833,70 +831,35 @@ void ARMv4::Execute() else #endif { - if (Thumb) // THUMB + if (CPSR & 0x20) // THUMB { - // attempt to delay t bit changes without a pipeline flush (msr) by one instruction - Thumb = CPSR & 0x20; - bool fix = !Thumb; if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); // prefetch - // thumb bit can change without a flush and is usually delayed 1 instruction - // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch - if (!Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme - { - R[15] += 4; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead32(R[15] & ~3); - } - else [[likely]] - { - R[15] += 2; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15] & ~1); - } + R[15] += 2; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead16(R[15]); if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else { // actually execute - u32 icode = (CurInstr >> 6) & 0x3FF; + u32 icode = (CurInstr >> 6); ARMInterpreter::THUMBInstrTable[icode](this); } - - if (fix) [[unlikely]] // attempt at fixing flushless t bit changes - { - R[15] += 2; // yes it can end up misaligned. that's correct. - NextInstr[1] = CodeRead32(R[15] & ~3); - } } else { - // attempt to delay t bit changes without a pipeline flush (msr) by one instruction - Thumb = CPSR & 0x20; if constexpr (mode == CPUExecuteMode::InterpreterGDB) GdbCheckC(); - + // prefetch - // thumb bit can change without a flush and is usually delayed 1 instruction - // but if the code fetch takes more than 1 cycle(?) it can take effect early for just the code fetch - if (Thumb && (NDS.ARM7MemTimings[CodeCycles][2] > 1)) [[unlikely]] // checkme? - { - R[15] += 4; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead16(R[15] & ~1); - } - else [[likely]] - { - R[15] += 4; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - NextInstr[1] = CodeRead32(R[15] & ~3); - } + R[15] += 4; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + NextInstr[1] = CodeRead32(R[15]); if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); else if (CheckCondition(CurInstr >> 28)) // actually execute diff --git a/src/ARM.h b/src/ARM.h index 81d6be39..26080b51 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -416,9 +416,6 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; -private: - bool Thumb; - protected: u8 BusRead8(u32 addr) override; u16 BusRead16(u32 addr) override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index cc19df3b..72d1e189 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -129,11 +129,11 @@ void A_MSR_IMM(ARM* cpu) if (cpu->CPSR & 0x20) [[unlikely]] { if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this - /*else + else { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least - }*/ + } } cpu->AddCycles_C(); diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index abe2bce0..9305fc42 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -585,7 +585,16 @@ A_IMPLEMENT_ALU_OP(RSC,) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->Num == 1) \ + { \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -600,7 +609,16 @@ A_IMPLEMENT_ALU_TEST(TST,_S) !res); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->Num == 1) \ + { \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -617,7 +635,16 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) OverflowSub(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->Num == 1) \ + { \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -634,7 +661,16 @@ A_IMPLEMENT_ALU_TEST(CMP,) OverflowAdd(a, b)); \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ { \ - if (cpu->Num == 1) cpu->RestoreCPSR(); /* ARM7 restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->Num == 1) \ + { \ + u32 oldpsr = cpu->CPSR; \ + cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ + if (cpu->CPSR & 0x20) \ + { \ + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \ + cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ + } \ + } \ else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN w/ rd == 15 on ARM9\n"); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -1591,7 +1627,16 @@ void T_CMP_HIREG(ARM* cpu) OverflowSub(a, b)); if (rd == 15) [[unlikely]] { - if (cpu->Num == 1) cpu->RestoreCPSR(); // ARM7 restores cpsr and does ___not___ flush the pipeline. + if (cpu->Num == 1) + { + u32 oldpsr = cpu->CPSR; + cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. + if (!(cpu->CPSR & 0x20)) + { + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR |= 0x20; // keep it from crashing the emulator at least + } + } else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15 on ARM9\n"); } cpu->AddCycles_C(); From 8af790beeec9e1a74648e8cb01a3492efcb6d340 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:00:35 -0400 Subject: [PATCH 092/115] ldm/str with empty rlist --- src/ARMInterpreter_LoadStore.cpp | 88 ++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index bf187aca..f181476b 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -434,7 +434,59 @@ void A_SWPB(ARM* cpu) SWP(cpu); } +void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeback, const bool decrement, bool preinc, const bool usermode) +{ + if (cpu->Num == 1) + { + u32 base = cpu->R[baseid]; + if (decrement) + { + preinc = !preinc; + base -= 0x40; + } + if (preinc) base+=4; + + if (load) + { + u32 pc; + if (cpu->DataRead32(base, &pc)) + { + cpu->AddCycles_CDI(); + cpu->JumpTo(pc, usermode); // checkme can we restore cpsr? + } + else + { + cpu->AddCycles_CDI(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + } + else + { + if (!cpu->DataWrite32(base, cpu->R[15])) + { + cpu->AddCycles_CD(); + ((ARMv5*)cpu)->DataAbort(); + return; + } + else + { + cpu->AddCycles_CD(); + } + } + } + else + { + cpu->AddCycles_C(); // checkme + } + + if (writeback) + { + if (decrement) cpu->R[baseid] -= 0x40; + else cpu->R[baseid] += 0x40; + } +} void A_LDM(ARM* cpu) { @@ -445,6 +497,12 @@ void A_LDM(ARM* cpu) u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; bool dabort = false; + + if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] + { + ReglessLDMSTM(cpu, true, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, cpu->CurInstr & (1<<22)); + return; + } if (!(cpu->CurInstr & (1<<23))) // decrement { @@ -545,6 +603,12 @@ void A_STM(ARM* cpu) u32 preinc = (cpu->CurInstr & (1<<24)); bool first = true; bool dabort = false; + + if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] + { + ReglessLDMSTM(cpu, false, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, false); + return; + } if (!(cpu->CurInstr & (1<<23))) { @@ -737,6 +801,12 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) nregs++; + + if (!nregs) [[unlikely]] + { + ReglessLDMSTM(cpu, false, 13, true, true, true, false); + return; + } u32 base = cpu->R[13]; base -= (nregs<<2); @@ -777,6 +847,12 @@ void T_POP(ARM* cpu) u32 base = cpu->R[13]; bool first = true; bool dabort = false; + + if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] + { + ReglessLDMSTM(cpu, true, 13, true, false, false, false); + return; + } for (int i = 0; i < 8; i++) { @@ -823,6 +899,12 @@ void T_STMIA(ARM* cpu) u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; + + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] + { + ReglessLDMSTM(cpu, false, (cpu->CurInstr >> 8) & 0x7, true, false, false, false); + return; + } for (int i = 0; i < 8; i++) { @@ -853,6 +935,12 @@ void T_LDMIA(ARM* cpu) u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; bool first = true; bool dabort = false; + + if (!(cpu->CurInstr & 0xFF)) [[unlikely]] + { + ReglessLDMSTM(cpu, true, (cpu->CurInstr >> 8) & 0x7, true, false, false, false); + return; + } for (int i = 0; i < 8; i++) { From 3b73f21bb7b5836bea24d82b5ce8326dee6ac7f9 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 23 Sep 2024 16:12:23 -0400 Subject: [PATCH 093/115] str r15 is incremented by +2/+4 oop --- src/ARMInterpreter_LoadStore.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index f181476b..e8e6accc 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -434,7 +434,7 @@ void A_SWPB(ARM* cpu) SWP(cpu); } -void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeback, const bool decrement, bool preinc, const bool usermode) +void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeback, const bool decrement, bool preinc, const bool usermode, const bool thumb) { if (cpu->Num == 1) { @@ -464,7 +464,7 @@ void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeb } else { - if (!cpu->DataWrite32(base, cpu->R[15])) + if (!cpu->DataWrite32(base, cpu->R[15] + (thumb ? 2 : 4))) { cpu->AddCycles_CD(); ((ARMv5*)cpu)->DataAbort(); @@ -500,7 +500,7 @@ void A_LDM(ARM* cpu) if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, cpu->CurInstr & (1<<22)); + ReglessLDMSTM(cpu, true, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, cpu->CurInstr & (1<<22), false); return; } @@ -606,7 +606,7 @@ void A_STM(ARM* cpu) if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] { - ReglessLDMSTM(cpu, false, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, false); + ReglessLDMSTM(cpu, false, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, false, false); return; } @@ -804,7 +804,7 @@ void T_PUSH(ARM* cpu) if (!nregs) [[unlikely]] { - ReglessLDMSTM(cpu, false, 13, true, true, true, false); + ReglessLDMSTM(cpu, false, 13, true, true, true, false, true); return; } @@ -850,7 +850,7 @@ void T_POP(ARM* cpu) if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, 13, true, false, false, false); + ReglessLDMSTM(cpu, true, 13, true, false, false, false, true); return; } @@ -902,7 +902,7 @@ void T_STMIA(ARM* cpu) if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { - ReglessLDMSTM(cpu, false, (cpu->CurInstr >> 8) & 0x7, true, false, false, false); + ReglessLDMSTM(cpu, false, (cpu->CurInstr >> 8) & 0x7, true, false, false, false, true); return; } @@ -938,7 +938,7 @@ void T_LDMIA(ARM* cpu) if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, (cpu->CurInstr >> 8) & 0x7, true, false, false, false); + ReglessLDMSTM(cpu, true, (cpu->CurInstr >> 8) & 0x7, true, false, false, false, true); return; } From 7fb18b11552374df2cd51454cca7ce8fbdc583bc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:03:58 -0400 Subject: [PATCH 094/115] clean up code --- src/ARMInterpreter_LoadStore.cpp | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index e8e6accc..59b9bc30 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -450,30 +450,16 @@ void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeb if (load) { u32 pc; - if (cpu->DataRead32(base, &pc)) - { - cpu->AddCycles_CDI(); - cpu->JumpTo(pc, usermode); // checkme can we restore cpsr? - } - else - { - cpu->AddCycles_CDI(); - ((ARMv5*)cpu)->DataAbort(); - return; - } + cpu->DataRead32(base, &pc); + + cpu->AddCycles_CDI(); + cpu->JumpTo(pc, usermode); } else { - if (!cpu->DataWrite32(base, cpu->R[15] + (thumb ? 2 : 4))) - { - cpu->AddCycles_CD(); - ((ARMv5*)cpu)->DataAbort(); - return; - } - else - { - cpu->AddCycles_CD(); - } + cpu->DataWrite32(base, cpu->R[15] + (thumb ? 2 : 4)); + + cpu->AddCycles_CD(); } } else From e1d4fbef750ce25e0b0c2c3f0f69bef7b5c79e85 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 24 Sep 2024 09:47:32 -0400 Subject: [PATCH 095/115] i can't reproduce this anymore --- src/ARM.cpp | 26 -------------------------- src/ARM.h | 3 --- 2 files changed, 29 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 6518b751..c194cc71 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -346,27 +346,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) NDS.MonitorARM9Jump(addr); } -void ARMv5::JumpTo8_16Bit(const u32 addr) -{ - // 8 and 16 loads (signed included) to pc - if (!(CP15Control & 0x1)) - { - // if the pu is disabled it behaves like a normal jump - JumpTo((CP15Control & (1<<15)) ? (addr & ~0x1) : addr); - } - else - { - if (addr & 0x3) - { - // if the pu is enabled it will always prefetch abort if not word aligned - // although it will still attempt (and fail) to enter thumb mode if enabled - if ((addr & 0x1) && !(CP15Control & (1<<15))) CPSR |= 0x20; - PrefetchAbort(); - } - else JumpTo(addr); - } -} - void ARMv4::JumpTo(u32 addr, bool restorecpsr) { if (restorecpsr) @@ -411,11 +390,6 @@ void ARMv4::JumpTo(u32 addr, bool restorecpsr) } } -void ARMv4::JumpTo8_16Bit(const u32 addr) -{ - JumpTo(addr & ~1); // checkme? -} - void ARM::RestoreCPSR() { u32 oldcpsr = CPSR; diff --git a/src/ARM.h b/src/ARM.h index 26080b51..e7156d72 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -75,7 +75,6 @@ public: virtual void FillPipeline() = 0; virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; - virtual void JumpTo8_16Bit(u32 addr) = 0; void RestoreCPSR(); void Halt(u32 halt) @@ -244,7 +243,6 @@ public: void FillPipeline() override; void JumpTo(u32 addr, bool restorecpsr = false) override; - void JumpTo8_16Bit(const u32 addr) override; void PrefetchAbort(); void DataAbort(); @@ -388,7 +386,6 @@ public: void FillPipeline() override; void JumpTo(u32 addr, bool restorecpsr = false) override; - void JumpTo8_16Bit(const u32 addr) override; template void Execute(); From 3065141ed751778523876b92c54f9b89c33becec Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 24 Sep 2024 17:03:18 -0400 Subject: [PATCH 096/115] probably not faster --- src/ARM.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index c194cc71..ac3fe200 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -697,13 +697,12 @@ void ARMv5::Execute() { ARMInterpreter::A_BLX_IMM(this); } - else + else if ((CurInstr & 0x0FF000F0) == 0x01200070) { - if ((((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0)) == 0x127) - ARMInterpreter::A_BKPT(this); // always passes regardless of condition code - else - AddCycles_C(); + ARMInterpreter::A_BKPT(this); // always passes regardless of condition code } + else + AddCycles_C(); } // TODO optimize this shit!!! From a11208ec6db98722579c047a78717408db1463be Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:02:17 -0400 Subject: [PATCH 097/115] oops --- src/ARM.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ARM.cpp b/src/ARM.cpp index ac3fe200..f97c26e2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -523,9 +523,11 @@ void ARM::TriggerIRQ() UpdateMode(oldcpsr, CPSR); R_IRQ[2] = oldcpsr; +#ifdef JIT_ENABLED if constexpr (mode == CPUExecuteMode::JIT) R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0); else +#endif R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4); JumpTo(ExceptionBase + 0x18); From 53b38c363fb820f97c4f44b8601540970f9017cf Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:32:53 -0400 Subject: [PATCH 098/115] ok no it didn't lie to me --- src/ARMInterpreter.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 72d1e189..1f95c1f8 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -136,7 +136,8 @@ void A_MSR_IMM(ARM* cpu) } } - cpu->AddCycles_C(); + if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + else cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) @@ -196,7 +197,8 @@ void A_MSR_REG(ARM* cpu) } } - cpu->AddCycles_C(); + if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + else cpu->AddCycles_C(); } void A_MRS(ARM* cpu) From 3870216fd06f634c2b0864feea5c4353ca94fd0b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:53:51 -0400 Subject: [PATCH 099/115] correction: --- src/ARMInterpreter.cpp | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 1f95c1f8..2b14de73 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -98,7 +98,8 @@ void A_MSR_IMM(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - cpu->AddCycles_C(); + if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 + else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -135,8 +136,16 @@ void A_MSR_IMM(ARM* cpu) cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } - - if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + + if (cpu->Num != 1) + { + if (cpu->CurInstr & (1<<22)) + { + cpu->AddCycles_CI(2); // spsr_fsxc + } + else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc + else cpu->AddCycles_C(); + } else cpu->AddCycles_C(); } @@ -159,7 +168,8 @@ void A_MSR_REG(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - cpu->AddCycles_C(); + if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 + else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -196,8 +206,16 @@ void A_MSR_REG(ARM* cpu) cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } - - if ((cpu->Num != 1) && (cpu->CurInstr & (0x7<<16))) cpu->AddCycles_CI(2); + + if (cpu->Num != 1) + { + if (cpu->CurInstr & (1<<22)) + { + cpu->AddCycles_CI(2); // spsr_fsxc + } + else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc + else cpu->AddCycles_C(); + } else cpu->AddCycles_C(); } From 93dce82b078a1df48fb2d74a6091300f5e807b37 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:48:17 -0400 Subject: [PATCH 100/115] implement cmp with "rd == 15" on arm9 cmp and friends with bits 12-15 set to 1 borrow characteristics from their legacy 26 bit p variants thumb version does nothing of note --- src/ARMInterpreter.cpp | 4 +- src/ARMInterpreter_ALU.cpp | 82 ++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 2b14de73..a04b6140 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -141,7 +141,7 @@ void A_MSR_IMM(ARM* cpu) { if (cpu->CurInstr & (1<<22)) { - cpu->AddCycles_CI(2); // spsr_fsxc + cpu->AddCycles_CI(2); // spsr } else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc else cpu->AddCycles_C(); @@ -211,7 +211,7 @@ void A_MSR_REG(ARM* cpu) { if (cpu->CurInstr & (1<<22)) { - cpu->AddCycles_CI(2); // spsr_fsxc + cpu->AddCycles_CI(2); // spsr } else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc else cpu->AddCycles_C(); diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 9305fc42..83fc1944 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -581,12 +581,12 @@ A_IMPLEMENT_ALU_OP(RSC,) #define A_TST(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a & b; \ - cpu->SetNZ(res & 0x80000000, \ - !res); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (cpu->CPSR & 0x20) \ @@ -595,7 +595,12 @@ A_IMPLEMENT_ALU_OP(RSC,) cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST w/ rd == 15 on ARM9\n"); \ + else cpu->JumpTo(res & ~1, true); /* TSTP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -605,12 +610,12 @@ A_IMPLEMENT_ALU_TEST(TST,_S) #define A_TEQ(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a ^ b; \ - cpu->SetNZ(res & 0x80000000, \ - !res); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (cpu->CPSR & 0x20) \ @@ -619,7 +624,12 @@ A_IMPLEMENT_ALU_TEST(TST,_S) cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ w/ rd == 15 on ARM9\n"); \ + else cpu->JumpTo(res & ~1, true); /* TEQP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZ(res & 0x80000000, \ + !res); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -629,14 +639,14 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) #define A_CMP(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a - b; \ - cpu->SetNZCV(res & 0x80000000, \ - !res, \ - CarrySub(a, b), \ - OverflowSub(a, b)); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarrySub(a, b), \ + OverflowSub(a, b)); \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (cpu->CPSR & 0x20) \ @@ -645,7 +655,14 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S) cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP w/ rd == 15 on ARM9\n"); \ + else cpu->JumpTo(res & ~1, true); /* CMPP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarrySub(a, b), \ + OverflowSub(a, b)); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -655,14 +672,14 @@ A_IMPLEMENT_ALU_TEST(CMP,) #define A_CMN(c) \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 res = a + b; \ - cpu->SetNZCV(res & 0x80000000, \ - !res, \ - CarryAdd(a, b), \ - OverflowAdd(a, b)); \ - if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* yes this instruction has a secret rd for some reason */ \ + if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ { \ if (cpu->Num == 1) \ { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarryAdd(a, b), \ + OverflowAdd(a, b)); \ u32 oldpsr = cpu->CPSR; \ cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \ if (cpu->CPSR & 0x20) \ @@ -671,7 +688,14 @@ A_IMPLEMENT_ALU_TEST(CMP,) cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \ } \ } \ - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN w/ rd == 15 on ARM9\n"); \ + else cpu->JumpTo(res & ~1, true); /* CMNP dna, doesn't update flags */ \ + } \ + else \ + { \ + cpu->SetNZCV(res & 0x80000000, \ + !res, \ + CarryAdd(a, b), \ + OverflowAdd(a, b)); \ } \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); @@ -1625,20 +1649,18 @@ void T_CMP_HIREG(ARM* cpu) !res, CarrySub(a, b), OverflowSub(a, b)); - if (rd == 15) [[unlikely]] + + if ((cpu->Num == 1) && (rd == 15)) { - if (cpu->Num == 1) + u32 oldpsr = cpu->CPSR; + cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. + if (!(cpu->CPSR & 0x20)) { - u32 oldpsr = cpu->CPSR; - cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline. - if (!(cpu->CPSR & 0x20)) - { - Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); - cpu->CPSR |= 0x20; // keep it from crashing the emulator at least - } + Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); + cpu->CPSR |= 0x20; // keep it from crashing the emulator at least } - else Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP HIREG w/ rd == 15 on ARM9\n"); } + cpu->AddCycles_C(); } From 787d0c9afcd963380eb364b72ac71e7012d85689 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:09:07 -0400 Subject: [PATCH 101/115] mrc r15 updates flags also my prior implementation made mrc w/ r15 raise an exception by accident oops! --- src/ARMInterpreter.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index a04b6140..82dc6876 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -297,11 +297,17 @@ void A_MRC(ARM* cpu) u32 cpinfo = (cpu->CurInstr >> 5) & 0x7; u32 rd = (cpu->CurInstr>>12) & 0xF; - if (cpu->Num==0 && cp==15 && rd!=15) + if (cpu->Num==0 && cp==15) { - cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); + if (rd != 15) cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo); + else + { + // r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status" + u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo) & 0xF0000000; + cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags; + } } - else if (cpu->Num==1 && cp==14 && rd!=15) + else if (cpu->Num==1 && cp==14) { Log(LogLevel::Debug, "MRC p14,%d,%d,%d on ARM7\n", cn, cm, cpinfo); } From e0e78a2bc8c633680e1936f66f906dec97a8474d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 12 Oct 2024 11:10:06 -0400 Subject: [PATCH 102/115] make empty r-list instructions a bit nicer pass bools as a single u8 instead and combine thumb and restore cpsr flags since they're mutually exclusive --- src/ARMInterpreter_LoadStore.cpp | 50 ++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 59b9bc30..84203310 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -434,30 +434,40 @@ void A_SWPB(ARM* cpu) SWP(cpu); } -void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeback, const bool decrement, bool preinc, const bool usermode, const bool thumb) +void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) { + enum // flags + { + load = (1<<0), + writeback = (1<<1), + decrement = (1<<2), + preinc = (1<<3), + restoreorthumb = (1<<4), // specifies restore cpsr for loads, thumb instr for stores + }; + if (cpu->Num == 1) { u32 base = cpu->R[baseid]; + bool flagpreinc = flags & preinc; - if (decrement) + if (flags & decrement) { - preinc = !preinc; + flagpreinc = !flagpreinc; base -= 0x40; } - if (preinc) base+=4; + if (flagpreinc) base+=4; - if (load) + if (flags & load) { u32 pc; cpu->DataRead32(base, &pc); cpu->AddCycles_CDI(); - cpu->JumpTo(pc, usermode); + cpu->JumpTo(pc, flags & restoreorthumb); } else { - cpu->DataWrite32(base, cpu->R[15] + (thumb ? 2 : 4)); + cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4)); cpu->AddCycles_CD(); } @@ -467,10 +477,10 @@ void ReglessLDMSTM(ARM* cpu, const bool load, const u8 baseid, const bool writeb cpu->AddCycles_C(); // checkme } - if (writeback) + if (flags & writeback) { - if (decrement) cpu->R[baseid] -= 0x40; - else cpu->R[baseid] += 0x40; + if (flags & decrement) cpu->R[baseid] -= 0x40; + else cpu->R[baseid] += 0x40; } } @@ -486,7 +496,11 @@ void A_LDM(ARM* cpu) if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, cpu->CurInstr & (1<<22), false); + EmptyRListLDMSTM(cpu, baseid, ((1 << 0) | // load + (((cpu->CurInstr >> 21) & 1) << 1) | // writeback + ((!(cpu->CurInstr & (1<<23))) << 2) | // decrement + ((preinc >> 24) << 3) | // preinc + (((cpu->CurInstr >> 22) & 1) << 4))); // restore return; } @@ -592,7 +606,11 @@ void A_STM(ARM* cpu) if (!(cpu->CurInstr & 0xFFFF)) [[unlikely]] { - ReglessLDMSTM(cpu, false, baseid, cpu->CurInstr & (1<<21), !(cpu->CurInstr & (1<<23)), preinc, false, false); + EmptyRListLDMSTM(cpu, baseid, ((0 << 0) | // load + (((cpu->CurInstr >> 21) & 1) << 1) | // writeback + ((!(cpu->CurInstr & (1<<23))) << 2) | // decrement + ((preinc >> 24) << 3) | // preinc + (0 << 4))); // thumb return; } @@ -790,7 +808,7 @@ void T_PUSH(ARM* cpu) if (!nregs) [[unlikely]] { - ReglessLDMSTM(cpu, false, 13, true, true, true, false, true); + EmptyRListLDMSTM(cpu, 13, 0b11110); return; } @@ -836,7 +854,7 @@ void T_POP(ARM* cpu) if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, 13, true, false, false, false, true); + EmptyRListLDMSTM(cpu, 13, 0b00011); return; } @@ -888,7 +906,7 @@ void T_STMIA(ARM* cpu) if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { - ReglessLDMSTM(cpu, false, (cpu->CurInstr >> 8) & 0x7, true, false, false, false, true); + EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010); return; } @@ -924,7 +942,7 @@ void T_LDMIA(ARM* cpu) if (!(cpu->CurInstr & 0xFF)) [[unlikely]] { - ReglessLDMSTM(cpu, true, (cpu->CurInstr >> 8) & 0x7, true, false, false, false, true); + EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011); return; } From 5f003eb967bfe5a4571e6830462f5e167dcf83f8 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 15 Oct 2024 20:23:03 -0400 Subject: [PATCH 103/115] fix builds with jit disabled --- src/ARM.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index f97c26e2..6ac989af 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -595,8 +595,11 @@ void ARMv5::Execute() Halted = 0; if (NDS.IME[0] & 0x1) { +#ifdef JIT_ENABLED if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ(); - else IRQ = 1; + else +#endif + IRQ = 1; } } else From 3c7db9b21f232f8dece502d20a9dba546fea217c Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Tue, 5 Nov 2024 21:56:19 -0500 Subject: [PATCH 104/115] correct thumb multiply timings --- src/ARMInterpreter_ALU.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 83fc1944..504a9c21 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -1583,10 +1583,10 @@ void T_MUL_REG(ARM* cpu) else { cpu->SetC(0); // carry flag destroyed, they say. whatever that means... - if (a & 0xFF000000) cycles += 4; - else if (a & 0x00FF0000) cycles += 3; - else if (a & 0x0000FF00) cycles += 2; - else cycles += 1; + if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; + else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2; + else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3; + else cycles = 4; } cpu->AddCycles_CI(cycles); } From 3d49f5f2560084dbc70b1df780ad12cf24e5b97f Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Wed, 6 Nov 2024 00:18:29 -0500 Subject: [PATCH 105/115] arm7 muls carry flag emulation. --- src/ARMInterpreter_ALU.cpp | 19 ++-- src/ARMInterpreter_MultiplySuperLLE.h | 136 ++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 src/ARMInterpreter_MultiplySuperLLE.h diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 504a9c21..72992f0f 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -19,6 +19,7 @@ #include #include "ARM.h" #include "NDS.h" +#include "ARMInterpreter_MultiplySuperLLE.h" namespace melonDS::ARMInterpreter { @@ -854,7 +855,6 @@ void A_MUL(ARM* cpu) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -866,6 +866,7 @@ void A_MUL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4)); } cpu->AddCycles_CI(cycles); @@ -886,7 +887,6 @@ void A_MLA(ARM* cpu) { cpu->SetNZ(res & 0x80000000, !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -898,6 +898,7 @@ void A_MLA(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -919,7 +920,6 @@ void A_UMULL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -931,6 +931,7 @@ void A_UMULL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -955,7 +956,6 @@ void A_UMLAL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -967,6 +967,7 @@ void A_UMLAL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -988,7 +989,6 @@ void A_SMULL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -1000,6 +1000,7 @@ void A_SMULL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -1024,7 +1025,6 @@ void A_SMLAL(ARM* cpu) { cpu->SetNZ((u32)(res >> 63ULL), !res); - if (cpu->Num==1) cpu->SetC(0); } u32 cycles; @@ -1036,6 +1036,7 @@ void A_SMLAL(ARM* cpu) else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else cycles = 5; + if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5)); } cpu->AddCycles_CI(cycles); @@ -1575,18 +1576,18 @@ void T_MUL_REG(ARM* cpu) cpu->SetNZ(res & 0x80000000, !res); - s32 cycles = 0; + s32 cycles; if (cpu->Num == 0) { - cycles += 3; + cycles = 3; } else { - cpu->SetC(0); // carry flag destroyed, they say. whatever that means... if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1; else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2; else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3; else cycles = 4; + cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means... } cpu->AddCycles_CI(cycles); } diff --git a/src/ARMInterpreter_MultiplySuperLLE.h b/src/ARMInterpreter_MultiplySuperLLE.h new file mode 100644 index 00000000..21b17bbc --- /dev/null +++ b/src/ARMInterpreter_MultiplySuperLLE.h @@ -0,0 +1,136 @@ +#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H +#define ARMINTERPRETER_MULTIPLYSUPERLLE_H + +#include "types.h" + +using namespace melonDS; + +/* + Copyright (c) 2024 zaydlang + + This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. +*/ + + + + +// code taken from: (also features a few alternative implementations that could maybe be worth looking at?) +// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h +// based on research that can be found here: https://bmchtech.github.io/post/multiply/ + +// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI. + + +// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2 +static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) { + // Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31 + multiplicand |= 1; + + // Optimized first iteration + u32 booth = (s32)(multiplier << 31) >> 31; + u32 carry = booth * multiplicand; + // Pre-populate accumulator for output + u32 output = accumulator; + + u32 sum = output + carry; + int shift = 29; + do { + for (int i = 0; i < 4; i++, shift -= 2) { + // Get next booth factor (-2 to 2, shifted left by 30-shift) + u32 next_booth = (s32)(multiplier << shift) >> shift; + u32 factor = next_booth - booth; + booth = next_booth; + // Get scaled value of booth addend + u32 addend = multiplicand * factor; + // Combine the addend with the CSA + // Not performing any masking seems to work because the lower carries can't propagate to bit 31 + output ^= carry ^ addend; + sum += addend; + carry = sum - output; + } + } while (booth != multiplier); + + return carry >> 31; +} + +// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended) +static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) { + // Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63 + // This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks), + // and only the last two booth iterations are needed + + // Set the low bit of the multiplicand to cause negation to invert the upper bits + multiplicand |= 1; + + // Pre-populate magic bit 61 for carry + u32 carry = ~accum_hi & UINT32_C(0x20000000); + // Pre-populate magic bits 63-60 for output (with carry magic pre-added in) + u32 output = accum_hi - UINT32_C(0x08000000); + + // Get factors from the top 3 booth chunks + u32 booth0 = (s32)(multiplier << 27) >> 27; + u32 booth1 = (s32)(multiplier << 29) >> 29; + u32 booth2 = (s32)(multiplier << 31) >> 31; + u32 factor0 = multiplier - booth0; + u32 factor1 = booth0 - booth1; + u32 factor2 = booth1 - booth2; + + // Get scaled value of the 3rd top booth addend + u32 addend = multiplicand * factor2; + // Finalize bits 61-60 of output magic using its sign + output -= addend & UINT32_C(0x10000000); + // Get scaled value of the 2nd top booth addend + addend = multiplicand * factor1; + // Finalize bits 63-62 of output magic using its sign + output -= addend & UINT32_C(0x40000000); + + // Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration + u32 sum = output + (addend & UINT32_C(0x20000000)); + // Subtract out the carry magic to get the actual output magic + output -= carry; + + // Get scaled value of the 1st top booth addend + addend = multiplicand * factor0; + // Add to bit 62 and propagate the carry + sum += addend & UINT32_C(0x40000000); + + // Cancel out the output magic bit 63 to get the carry bit 63 + return (sum ^ output) >> 31; +} + + +// also for MLAS and MUL (thumb ver.) +inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle) +{ + if (lastcycle) + return (rs >> 30) == -2; + else + return booths_multiplication32_opt(rm, rs, rn); +} + +// also for UMLALS +inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +// also for SMLALS +inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle) +{ + if (lastcycle) + return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32); + else + return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF); +} + +#endif From ef5de6091b903bac7a0d4f34673a69efbea27906 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:16:39 -0500 Subject: [PATCH 106/115] t blx long with bit 0 set should raise an exception fixes a bug with gbarunner3 --- src/ARMInterpreter_Branch.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 623be41a..88b14ab7 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -104,6 +104,9 @@ void T_BL_LONG_1(ARM* cpu) void T_BL_LONG_2(ARM* cpu) { + if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an unvalid instruction. + return T_UNK(cpu); // TODO: Check ARM7 for exceptions + s32 offset = (cpu->CurInstr & 0x7FF) << 1; u32 pc = cpu->R[14] + offset; cpu->R[14] = (cpu->R[15] - 2) | 1; From 5091061a39d307f9dd92ef0aa5d808fb0900121b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:16:19 -0500 Subject: [PATCH 107/115] improve accuracy of prefetch abort handling slightly prefetch aborts should be handled on executing an instruction by a flag set when the instruction is fetched --- src/ARM.cpp | 8 ++++---- src/ARM.h | 6 +++--- src/ARMJIT.cpp | 2 +- src/CP15.cpp | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 682ce9ff..7f5d2e86 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -222,7 +222,7 @@ void ARM::DoSavestate(Savestate* file) file->VarArray(R_ABT, 3*sizeof(u32)); file->VarArray(R_IRQ, 3*sizeof(u32)); file->VarArray(R_UND, 3*sizeof(u32)); - file->Var32(&CurInstr); + file->Var64(&CurInstr); #ifdef JIT_ENABLED if (file->Saving && NDS.IsJITEnabled()) { @@ -232,7 +232,7 @@ void ARM::DoSavestate(Savestate* file) FillPipeline(); } #endif - file->VarArray(NextInstr, 2*sizeof(u32)); + file->VarArray(NextInstr, 2*sizeof(u64)); file->Var32(&ExceptionBase); @@ -667,7 +667,7 @@ void ARMv5::Execute() if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); - else if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] // handle aborted instructions + else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); } @@ -690,7 +690,7 @@ void ARMv5::Execute() if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); - else if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // handle aborted instructions + else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions { PrefetchAbort(); } diff --git a/src/ARM.h b/src/ARM.h index e7156d72..f4b3b53f 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -177,8 +177,8 @@ public: u32 R_ABT[3]; u32 R_IRQ[3]; u32 R_UND[3]; - u32 CurInstr; - u32 NextInstr[2]; + u64 CurInstr; + u64 NextInstr[2]; u32 ExceptionBase; @@ -251,7 +251,7 @@ public: void Execute(); // all code accesses are forced nonseq 32bit - u32 CodeRead32(u32 addr, bool branch); + u64 CodeRead32(u32 addr, bool branch); bool DataRead8(u32 addr, u32* val) override; bool DataRead16(u32 addr, u32* val) override; diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 1ebcce8e..8bf509e9 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -588,7 +588,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept u32 numWriteAddrs = 0, writeAddrsTranslated = 0; cpu->FillPipeline(); - u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; + u32 nextInstr[2] = {(u32)cpu->NextInstr[0], (u32)cpu->NextInstr[1]}; u32 nextInstrAddr[2] = {blockAddr, r15}; JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr); diff --git a/src/CP15.cpp b/src/CP15.cpp index 5bffb185..fba73bda 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -771,14 +771,14 @@ u32 ARMv5::CP15Read(u32 id) const // TCM are handled here. // TODO: later on, handle PU, and maybe caches -u32 ARMv5::CodeRead32(u32 addr, bool branch) +u64 ARMv5::CodeRead32(u32 addr, bool branch) { // prefetch abort // the actual exception is not raised until the aborted instruction is executed if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] { CodeCycles = 1; - return 0; + return ((u64)1<<63); } if (addr < ITCMSize) From 60a819c1ed993aaf1d9ab16386d29f70596935ef Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 8 Nov 2024 01:02:36 -0500 Subject: [PATCH 108/115] correct handling of T bit changes w/o pipeline flush on arm9 --- src/ARMInterpreter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 82dc6876..ff79597e 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -129,7 +129,7 @@ void A_MSR_IMM(ARM* cpu) if (cpu->CPSR & 0x20) [[unlikely]] { - if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect. else { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); @@ -199,7 +199,7 @@ void A_MSR_REG(ARM* cpu) if (cpu->CPSR & 0x20) [[unlikely]] { - if (cpu->Num == 0) cpu->NextInstr[1] &= 0xFFFF; // checkme: probably not the right way to handle this + if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect. else { Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n"); From 676f471ebe788120efebc69271f58d5e2af2564b Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 8 Nov 2024 01:36:14 -0500 Subject: [PATCH 109/115] fix edge case with thumb prefetch aborts --- src/ARM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 7f5d2e86..beefc132 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -667,7 +667,7 @@ void ARMv5::Execute() if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); - else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions + else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions { PrefetchAbort(); } From 9f8cf8dad20d9c65fbf458ff610ec37361e6f3fc Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 9 Nov 2024 14:49:34 -0500 Subject: [PATCH 110/115] ldm base writeback fails with r15 --- src/ARMInterpreter_LoadStore.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 9dc14ea4..77628d7d 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -568,7 +568,7 @@ void A_LDM(ARM* cpu) } // writeback to base - if (cpu->CurInstr & (1<<21)) + if (cpu->CurInstr & (1<<21) && (baseid != 15)) { // post writeback if (cpu->CurInstr & (1<<23)) @@ -624,7 +624,7 @@ void A_STM(ARM* cpu) base -= 4; } - if (cpu->CurInstr & (1<<21)) + if ((cpu->CurInstr & (1<<21)) && (baseid != 15)) cpu->R[baseid] = base; preinc = !preinc; @@ -681,7 +681,7 @@ void A_STM(ARM* cpu) return; } - if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) + if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)) && (baseid != 15)) cpu->R[baseid] = base; From e4dd913ba3a1151fca3cba1ab76ad386a85eef58 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 9 Nov 2024 15:38:08 -0500 Subject: [PATCH 111/115] arm7 RORs unaligned ldr(s)h ty mgba discord --- src/ARMInterpreter_LoadStore.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 77628d7d..80f82755 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -100,7 +100,10 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) return; } if constexpr (size == 8 && signror) val = (s32)(s8)val; - if constexpr (size == 16 && signror) val = (s32)(s16)val; + + if constexpr (size == 16) if (cpu->Num == 1) val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 + if constexpr (size == 16 && signror) val = (s32)(((cpu->Num == 1) && (addr & 1)) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. + if constexpr (size == 32 && signror) val = ROR(val, ((addr&0x3)<<3)); if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; From bdc315198f302ed03fdaf435e58e42b1095e4366 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 9 Nov 2024 16:12:19 -0500 Subject: [PATCH 112/115] T_LDR_SPREL does ROR + misc cleanup --- src/ARMInterpreter_LoadStore.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 80f82755..4cd9a8fb 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -66,7 +66,7 @@ enum class Writeback Trans, }; -template +template void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) { static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); @@ -99,14 +99,21 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) ((ARMv5*)cpu)->DataAbort(); return; } - if constexpr (size == 8 && signror) val = (s32)(s8)val; + if constexpr (size == 8 && signextend) val = (s32)(s8)val; - if constexpr (size == 16) if (cpu->Num == 1) val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 - if constexpr (size == 16 && signror) val = (s32)(((cpu->Num == 1) && (addr & 1)) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. + if constexpr (size == 16) + { + if (cpu->Num == 1) val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 + { + if constexpr (signextend) val = (s32)((addr&0x1) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. + } + else if constexpr (signextend) val = (s32)(s16)val; + } - if constexpr (size == 32 && signror) val = ROR(val, ((addr&0x3)<<3)); + if constexpr (size == 32) val = ROR(val, ((addr&0x3)<<3)); - if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; + + if constexpr (writeback != Writeback::None) cpu->R[rn] = addr; if (rd == 15) { @@ -173,12 +180,12 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDR \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDR_POST \ - if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ - else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); + if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ + else LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); #define A_LDRB \ if (cpu->CurInstr & (1<<21)) LoadSingle(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ @@ -723,7 +730,7 @@ void T_STRB_REG(ARM* cpu) void T_LDR_REG(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); } void T_LDRB_REG(ARM* cpu) @@ -760,7 +767,7 @@ void T_STR_IMM(ARM* cpu) void T_LDR_IMM(ARM* cpu) { - LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); + LoadSingle(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); } void T_STRB_IMM(ARM* cpu) From ec241a822428392d4558245b9fc3bdc6aed148e5 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 9 Nov 2024 16:18:48 -0500 Subject: [PATCH 113/115] im smrat :D --- src/ARMInterpreter_LoadStore.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 4cd9a8fb..97bef0b0 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -99,12 +99,14 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) ((ARMv5*)cpu)->DataAbort(); return; } + if constexpr (size == 8 && signextend) val = (s32)(s8)val; if constexpr (size == 16) { - if (cpu->Num == 1) val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 + if (cpu->Num == 1) { + val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 if constexpr (signextend) val = (s32)((addr&0x1) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. } else if constexpr (signextend) val = (s32)(s16)val; From fce0555a09283f7d5fdf6f195b4a9d8d2088b484 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sat, 9 Nov 2024 22:07:17 -0500 Subject: [PATCH 114/115] slightly fix error in writeback handling --- src/ARMInterpreter_LoadStore.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 97bef0b0..159fc86f 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -115,6 +115,7 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) if constexpr (size == 32) val = ROR(val, ((addr&0x3)<<3)); + if constexpr (writeback >= Writeback::Post) addr += offset; if constexpr (writeback != Writeback::None) cpu->R[rn] = addr; if (rd == 15) @@ -160,8 +161,9 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) ((ARMv5*)cpu)->DataAbort(); return; } - - if constexpr (writeback != Writeback::None) cpu->R[rn] += offset; + + if constexpr (writeback >= Writeback::Post) addr += offset; + if constexpr (writeback != Writeback::None) cpu->R[rn] = addr; } From 9d92b8708a2b805dbefe75fbf59612ec996d9f8d Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Sun, 10 Nov 2024 02:56:16 -0500 Subject: [PATCH 115/115] r15 writeback is very weird with ldr/str --- src/ARMInterpreter_Branch.cpp | 2 +- src/ARMInterpreter_LoadStore.cpp | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/ARMInterpreter_Branch.cpp b/src/ARMInterpreter_Branch.cpp index 88b14ab7..5731a0b6 100644 --- a/src/ARMInterpreter_Branch.cpp +++ b/src/ARMInterpreter_Branch.cpp @@ -104,7 +104,7 @@ void T_BL_LONG_1(ARM* cpu) void T_BL_LONG_2(ARM* cpu) { - if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an unvalid instruction. + if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction. return T_UNK(cpu); // TODO: Check ARM7 for exceptions s32 offset = (cpu->CurInstr & 0x7FF) << 1; diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 159fc86f..a2c9d7cc 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -67,7 +67,7 @@ enum class Writeback }; template -void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset) { static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); @@ -116,7 +116,19 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) if constexpr (writeback >= Writeback::Post) addr += offset; - if constexpr (writeback != Writeback::None) cpu->R[rn] = addr; + if constexpr (writeback != Writeback::None) + { + if (rn != 15) [[likely]] // r15 writeback fails on arm9 + { + cpu->R[rn] = addr; + } + else if (cpu->Num == 1) // arm 7 + { + // note that at no point does it actually write the value it loaded to a register... + cpu->JumpTo((addr+4) & ~1); + return; + } + } if (rd == 15) { @@ -127,7 +139,7 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } template -void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) +void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset) { static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); @@ -163,7 +175,17 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) } if constexpr (writeback >= Writeback::Post) addr += offset; - if constexpr (writeback != Writeback::None) cpu->R[rn] = addr; + if constexpr (writeback != Writeback::None) + { + if (rn != 15) [[likely]] // r15 writeback fails on arm9 + { + cpu->R[rn] = addr; + } + else if (cpu->Num == 1) // arm 7 + { + cpu->JumpTo(addr & ~1); + } + } }