track interlock cycles for load instructions

This commit is contained in:
Jaklyy 2024-06-14 00:51:55 -04:00
parent 42218106b0
commit 5a174a2ce3
3 changed files with 143 additions and 33 deletions

View File

@ -1314,6 +1314,9 @@ void ARMv4::AddCycles_CD()
Cycles += numC + numD; Cycles += numC + numD;
} }
} }
u64 ARMv5::Timestamp() { return NDS.ARM9Timestamp; }
u64 ARMv4::Timestamp() { return NDS.ARM7Timestamp; }
u8 ARMv5::BusRead8(u32 addr) u8 ARMv5::BusRead8(u32 addr)
{ {

View File

@ -143,6 +143,24 @@ public:
virtual void AddCycles_CDI() = 0; virtual void AddCycles_CDI() = 0;
virtual void AddCycles_CD() = 0; virtual void AddCycles_CD() = 0;
inline void AddCycles_L(const u8 reg1)
{
Cycles += InterlockTimestamp[reg1];
}
inline void AddCycles_L(const u8 reg1, const u8 reg2)
{
Cycles += std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]);
}
// Must be called after all of an instruction's cycles are calculated!!!
inline void SetCycles_L(const u8 reg, const u8 cycles, const u8 type)
{
InterlockTimestamp[reg] = cycles + Timestamp() + Cycles;
}
virtual u64 Timestamp() = 0;
void CheckGdbIncoming(); void CheckGdbIncoming();
u32 Num; u32 Num;
@ -179,6 +197,15 @@ public:
MemRegion CodeMem; MemRegion CodeMem;
enum InterlockType
{
ILT_Norm = 0,
ILT_Mul = 1,
};
u8 InterlockType[16];
u64 InterlockTimestamp[16];
#ifdef JIT_ENABLED #ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize; u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup; u64* FastBlockLookup;
@ -299,6 +326,8 @@ public:
// Cycles += numC + numD; // Cycles += numC + numD;
} }
u64 Timestamp() override;
void GetCodeMemRegion(u32 addr, MemRegion* region); void GetCodeMemRegion(u32 addr, MemRegion* region);
void CP15Reset(); void CP15Reset();
@ -413,6 +442,8 @@ public:
void AddCycles_CI(s32 num) override; void AddCycles_CI(s32 num) override;
void AddCycles_CDI() override; void AddCycles_CDI() override;
void AddCycles_CD() override; void AddCycles_CD() override;
u64 Timestamp() override;
protected: protected:
u8 BusRead8(u32 addr) override; u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override; u16 BusRead16(u32 addr) override;

View File

@ -115,6 +115,7 @@ namespace melonDS::ARMInterpreter
else \ else \
{ \ { \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (offset & 3) ? 2 : 1, cpu->ILT_Norm); \
} }
// TODO: user mode // TODO: user mode
@ -133,6 +134,7 @@ namespace melonDS::ARMInterpreter
else \ else \
{ \ { \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (addr & 3) ? 2 : 1, cpu->ILT_Norm); \
} }
#define A_LDRB \ #define A_LDRB \
@ -141,8 +143,13 @@ namespace melonDS::ARMInterpreter
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \ if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
}
// TODO: user mode // TODO: user mode
#define A_LDRB_POST \ #define A_LDRB_POST \
@ -151,8 +158,13 @@ namespace melonDS::ARMInterpreter
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \ cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
}
@ -261,11 +273,17 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \
u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \ u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; if (dataabort) return; \
if (r == 14) \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else \
{ \
cpu->R[r+1] = val; \
cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \
} \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
#define A_LDRD_POST \ #define A_LDRD_POST \
if (cpu->Num != 0) return; \ if (cpu->Num != 0) return; \
@ -273,10 +291,16 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \ if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI(); return;} \
u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \ u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \
if (r == 14) \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else \
{ \
cpu->R[r+1] = val; \
cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \
} \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_STRD \ #define A_STRD \
@ -308,8 +332,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \ u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRH_POST \ #define A_LDRH_POST \
@ -317,8 +346,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \ u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRSB \ #define A_LDRSB \
@ -327,8 +361,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
val = (s32)(s8)val; \ val = (s32)(s8)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRSB_POST \ #define A_LDRSB_POST \
@ -337,8 +376,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
val = (s32)(s8)val; \ val = (s32)(s8)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRSH \ #define A_LDRSH \
@ -347,8 +391,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
val = (s32)(s16)val; \ val = (s32)(s16)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRSH_POST \ #define A_LDRSH_POST \
@ -357,8 +406,13 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->AddCycles_CDI(); \ cpu->AddCycles_CDI(); \
if (dataabort) return; \ if (dataabort) return; \
val = (s32)(s16)val; \ val = (s32)(s16)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \ cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
@ -408,14 +462,21 @@ void A_SWP(ARM* cpu)
u32 numD = cpu->DataCycles; u32 numD = cpu->DataCycles;
if (cpu->DataWrite32(base, rm)) if (cpu->DataWrite32(base, rm))
{ {
cpu->AddCycles_CDI();
// rd only gets updated if both read and write succeed // rd only gets updated if both read and write succeed
u32 rd = (cpu->CurInstr >> 12) & 0xF; u32 rd = (cpu->CurInstr >> 12) & 0xF;
if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3)); if (rd != 15)
else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9? {
cpu->R[rd] = ROR(val, 8*(base&0x3));
cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a misaligned load from a non-itcm address
} }
else if (cpu->Num==1) // for some reason these jumps don't work on the arm 9?
cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm);
}
else cpu->AddCycles_CDI();
cpu->DataCycles += numD; cpu->DataCycles += numD;
} }
cpu->AddCycles_CDI(); else cpu->AddCycles_CDI();
} }
void A_SWPB(ARM* cpu) void A_SWPB(ARM* cpu)
@ -430,14 +491,21 @@ void A_SWPB(ARM* cpu)
u32 numD = cpu->DataCycles; u32 numD = cpu->DataCycles;
if (cpu->DataWrite8(base, rm)) if (cpu->DataWrite8(base, rm))
{ {
cpu->AddCycles_CDI();
// rd only gets updated if both read and write succeed // rd only gets updated if both read and write succeed
u32 rd = (cpu->CurInstr >> 12) & 0xF; u32 rd = (cpu->CurInstr >> 12) & 0xF;
if (rd != 15) cpu->R[rd] = val; if (rd != 15)
else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9? {
cpu->R[rd] = val;
cpu->SetCycles_L(rd, 1, cpu->ILT_Norm); // TODO: it adds an extra interlock cycle when doing a load from a non-itcm address
} }
else if (cpu->Num==1)// for some reason these jumps don't work on the arm 9?
cpu->JumpTo(val & ~1);
}
else cpu->AddCycles_CDI();
cpu->DataCycles += numD; cpu->DataCycles += numD;
} }
cpu->AddCycles_CDI(); else cpu->AddCycles_CDI();
} }
@ -450,6 +518,7 @@ void A_LDM(ARM* cpu)
u32 oldbase = base; u32 oldbase = base;
u32 preinc = (cpu->CurInstr & (1<<24)); u32 preinc = (cpu->CurInstr & (1<<24));
bool first = true; bool first = true;
u8 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?)
if (!(cpu->CurInstr & (1<<23))) // decrement if (!(cpu->CurInstr & (1<<23))) // decrement
{ {
@ -486,6 +555,7 @@ void A_LDM(ARM* cpu)
first = false; first = false;
if (!preinc) base += 4; if (!preinc) base += 4;
lastreg = i;
} }
} }
@ -498,12 +568,18 @@ void A_LDM(ARM* cpu)
{ {
goto dataabort; goto dataabort;
} }
cpu->AddCycles_CDI();
if (!preinc) base += 4; if (!preinc) base += 4;
if (cpu->Num == 1) if (cpu->Num == 1)
pc &= ~0x1; pc &= ~0x1;
} }
else
{
cpu->AddCycles_CDI();
cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm); // TODO: THIS DOESN'T APPLY WHEN LOADING FROM ITCM
}
// switch back to previous regs // switch back to previous regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
@ -537,6 +613,8 @@ void A_LDM(ARM* cpu)
if (false) if (false)
{ {
dataabort: dataabort:
cpu->AddCycles_CDI();
// CHECKME: interlock shouldn't apply when it data aborts, right?
// switch back to original set of regs // switch back to original set of regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
@ -545,8 +623,6 @@ void A_LDM(ARM* cpu)
// restore original value of base in case the reg got written to // restore original value of base in case the reg got written to
cpu->R[baseid] = oldbase; cpu->R[baseid] = oldbase;
} }
cpu->AddCycles_CDI();
} }
void A_STM(ARM* cpu) void A_STM(ARM* cpu)