initial implementation of interlock cycles

This commit is contained in:
Jaklyy
2024-06-15 16:07:36 -04:00
parent aa1217af0a
commit a973c0bf5b
5 changed files with 179 additions and 125 deletions

View File

@ -160,14 +160,14 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry)
cpu->SetC(b & 0x80000000);
#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 b = cpu->GetReg(cpu->CurInstr&0xF); \
u32 s = (cpu->CurInstr>>7)&0x1F; \
shiftop(b, s);
#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 b = cpu->GetReg(cpu->CurInstr&0xF); \
if ((cpu->CurInstr&0xF)==15) b += 4; \
shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF));
shiftop(b, (cpu->GetReg((cpu->CurInstr>>8)&0xF) & 0xFF));
#define A_IMPLEMENT_ALU_OP(x,s) \
@ -313,7 +313,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
#define A_AND(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a & b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
}
#define A_AND_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a & b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(AND,_S)
#define A_EOR(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a ^ b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -357,7 +357,7 @@ A_IMPLEMENT_ALU_OP(AND,_S)
}
#define A_EOR_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a ^ b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -375,7 +375,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
#define A_SUB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a - b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -388,7 +388,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
}
#define A_SUB_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -408,7 +408,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
#define A_RSB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = b - a; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -421,7 +421,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
}
#define A_RSB_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = b - a; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -441,7 +441,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
#define A_ADD(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a + b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -454,7 +454,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
}
#define A_ADD_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -474,7 +474,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
#define A_ADC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -487,7 +487,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
}
#define A_ADC_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res_tmp = a + b; \
u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \
u32 res = res_tmp + carry; \
@ -509,7 +509,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
#define A_SBC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -522,7 +522,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
}
#define A_SBC_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res_tmp = a - b; \
u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
u32 res = res_tmp - carry; \
@ -544,7 +544,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
#define A_RSC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -557,7 +557,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
}
#define A_RSC_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res_tmp = b - a; \
u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
u32 res = res_tmp - carry; \
@ -579,7 +579,7 @@ A_IMPLEMENT_ALU_OP(RSC,)
#define A_TST(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a & b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -589,7 +589,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
#define A_TEQ(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a ^ b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -599,7 +599,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
#define A_CMP(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(CMP,)
#define A_CMN(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
#define A_ORR(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a | b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -636,7 +636,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
}
#define A_ORR_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a | b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -699,7 +699,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
#define A_BIC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a & ~b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -712,7 +712,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
}
#define A_BIC_S(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 res = a & ~b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -761,8 +761,8 @@ A_IMPLEMENT_ALU_OP(MVN,_S)
void A_MUL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF);
u32 res = rm * rs;
@ -791,9 +791,9 @@ void A_MUL(ARM* cpu)
void A_MLA(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF);
u32 res = (rm * rs) + rn;
@ -822,8 +822,8 @@ void A_MLA(ARM* cpu)
void A_UMULL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u64 res = (u64)rm * (u64)rs;
@ -848,17 +848,17 @@ void A_UMULL(ARM* cpu)
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_UMLAL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u64 res = (u64)rm * (u64)rs;
u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL);
u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL); // CHECKME: INTERLOCK?
res += rd;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
@ -887,8 +887,8 @@ void A_UMLAL(ARM* cpu)
void A_SMULL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
s64 res = (s64)(s32)rm * (s64)(s32)rs;
@ -913,17 +913,17 @@ void A_SMULL(ARM* cpu)
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_SMLAL(ARM* cpu)
{
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
s64 res = (s64)(s32)rm * (s64)(s32)rs;
s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL)); // CHECKME: INTERLOCK?
res += rd;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
@ -947,16 +947,16 @@ void A_SMLAL(ARM* cpu)
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_SMLAxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1);
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -978,9 +978,9 @@ void A_SMLAWy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1);
if (cpu->CurInstr & (1<<6)) rs >>= 16;
else rs &= 0xFFFF;
@ -1000,8 +1000,8 @@ void A_SMULxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -1019,8 +1019,8 @@ void A_SMULWy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
if (cpu->CurInstr & (1<<6)) rs >>= 16;
else rs &= 0xFFFF;
@ -1036,8 +1036,8 @@ void A_SMLALxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 0);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 0); // yeah this one actually doesn't need two interlock cycles to interlock
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -1053,7 +1053,7 @@ void A_SMLALxy(ARM* cpu)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->AddCycles_CI(1);
cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm);
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
}
@ -1062,7 +1062,7 @@ void A_CLZ(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 val = cpu->R[cpu->CurInstr & 0xF];
u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 res = 0;
while ((val & 0xFF000000) == 0)
@ -1086,8 +1086,8 @@ void A_QADD(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 res = rm + rn;
if (OverflowAdd(rm, rn))
@ -1105,8 +1105,8 @@ void A_QSUB(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 res = rm - rn;
if (OverflowSub(rm, rn))
@ -1124,8 +1124,8 @@ void A_QDADD(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
if (OverflowAdd(rn, rn))
{
@ -1151,8 +1151,8 @@ void A_QDSUB(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
if (OverflowAdd(rn, rn))
{