revert the *entire* interlock implemention

too slow, not accurate enough.
we need to do a *lot* more research into the specifics of how this works with all the various aspects of the cpu's timings before we can make a good implementation
This commit is contained in:
Jaklyy 2024-07-11 20:06:56 -04:00
parent 1fdac1d489
commit 038ffa3a35
6 changed files with 290 additions and 534 deletions

View File

@ -190,8 +190,6 @@ void ARM::Reset()
BreakReq = false;
#endif
memset(InterlockTimestamp, 0, sizeof(InterlockTimestamp));
// zorp
JumpTo(ExceptionBase);
}
@ -695,7 +693,6 @@ void ARMv5::Execute()
NDS.ARM9Timestamp += Cycles;
Cycles = 0;
CyclesILed = 0;
}
if (Halted == 2)
@ -1262,7 +1259,7 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort)
void ARMv5::AddCycles_CD_STR()
{
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles + CyclesILed;
s32 numD = DataCycles;
s32 early;
if (DataRegion == Mem9_ITCM)
@ -1287,7 +1284,7 @@ void ARMv5::AddCycles_CD_STR()
void ARMv5::AddCycles_CD_STM()
{
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles + CyclesILed;
s32 numD = DataCycles;
s32 early;
if (DataRegion == Mem9_ITCM)
@ -1313,7 +1310,7 @@ void ARMv5::AddCycles_CDI_LDR()
{
// LDR cycles. ARM9 seems to skip the internal cycle here.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles + CyclesILed;
s32 numD = DataCycles;
// if a 32 bit bus, start 2 cycles early; else, start 4 cycles early
s32 early;
@ -1340,7 +1337,7 @@ void ARMv5::AddCycles_CDI_LDM()
{
// LDM cycles. ARM9 seems to skip the internal cycle here.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles + CyclesILed;
s32 numD = DataCycles;
// if a 32 bit bus, start 2 cycles early; else, start 4 cycles early
s32 early;
@ -1442,16 +1439,6 @@ void ARMv4::AddCycles_CD()
}
}
u64& ARMv5::Timestamp()
{
return NDS.ARM9Timestamp;
}
u64& ARMv4::Timestamp()
{
return NDS.ARM7Timestamp;
}
u8 ARMv5::BusRead8(u32 addr)
{
return NDS.ARM9Read8(addr);

View File

@ -30,8 +30,6 @@
#include "debug/GdbStub.h"
#endif
//#define INTERLOCK
namespace melonDS
{
inline u32 ROR(u32 x, u32 n)
@ -148,46 +146,6 @@ public:
virtual void AddCycles_CD_STR() = 0;
virtual void AddCycles_CD_STM() = 0;
/*
inline void AddCycles_L(const u32 delay, const u32 reg1)
{
if (InterlockTimestamp[reg1] > Timestamp() + delay);
Timestamp() = InterlockTimestamp[reg1];
}
inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2)
{
u64 cycles = std::max(InterlockTimestamp[reg1], InterlockTimestamp[reg2]);
if (cycles > Timestamp() + delay)
Timestamp() = cycles;
}
inline void AddCycles_L(const u32 delay, const u32 reg1, const u32 reg2, const u32 reg3)
{
u64 cycles = std::max(InterlockTimestamp[reg1], std::max(InterlockTimestamp[reg2], InterlockTimestamp[reg3]));
if (cycles > Timestamp() + delay)
Timestamp() = cycles;
}*/
#ifdef INTERLOCK
// fetch the value of a register while handling any interlock cycles
virtual inline u32 GetReg(const u32 reg, const u32 delay = 0) = 0;
// Must be called after all of an instruction's cycles are calculated!!!
virtual inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) = 0;
#else
// fetch the value of a register while handling any interlock cycles
inline u32 GetReg(const u32 reg, const u32 delay = 0)
{
return R[reg];
}
// Must be called after all of an instruction's cycles are calculated!!!
inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) {}
#endif
virtual u64& Timestamp() = 0;
void CheckGdbIncoming();
u32 Num;
@ -224,15 +182,6 @@ public:
MemRegion CodeMem;
enum InterlockType
{
ILT_Norm = 0,
ILT_Mul = 1,
};
u8 InterlockType[16];
u64 InterlockTimestamp[16];
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
u64* FastBlockLookup;
@ -318,14 +267,14 @@ public:
{
// code only. always nonseq 32-bit for ARM9.
s32 numC = CodeCycles;
Cycles += std::max(numC, CyclesILed + 1);
Cycles += numC;
}
void AddCycles_CI(s32 numI) override
{
// code+internal
s32 numC = CodeCycles;
numI += 1 + CyclesILed;
numI += 1;
Cycles += std::max(numC, numI);
}
@ -334,25 +283,6 @@ public:
void AddCycles_CDI_SWP() override { AddCycles_CD_STR(); } // uses the same behavior as str
void AddCycles_CD_STR() override;
void AddCycles_CD_STM() override;
#ifdef INTERLOCK
// fetch the value of a register while handling any interlock cycles
inline u32 GetReg(const u32 reg, const u32 delay = 0) override
{
if (InterlockTimestamp[reg] > (Timestamp() + delay))
CyclesILed = InterlockTimestamp[reg] - (Timestamp() + delay);
return R[reg];
}
// Must be called after all of an instruction's cycles are calculated!!!
inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override
{
InterlockTimestamp[reg] = cycles + Timestamp() + Cycles;
//InterlockType[reg] = type;
}
#endif
u64& Timestamp() override;
void GetCodeMemRegion(u32 addr, MemRegion* region);
@ -417,8 +347,6 @@ public:
bool (*GetMemRegion)(u32 addr, bool write, MemRegion* region);
s32 CyclesILed;
#ifdef GDBSTUB_ENABLED
u32 ReadMem(u32 addr, int size) override;
void WriteMem(u32 addr, int size, u32 v) override;
@ -476,18 +404,6 @@ public:
void AddCycles_CD_STR() override { AddCycles_CD(); }
void AddCycles_CD_STM() override { AddCycles_CD(); }
#ifdef INTERLOCK
// fetch the value of a register while handling any interlock cycles
inline u32 GetReg(const u32 reg, const u32 delay = 0) override
{
return R[reg];
}
// Must be called after all of an instruction's cycles are calculated!!!
inline void SetCycles_L(const u32 reg, const u32 cycles, const u32 type) override{}
#endif
u64& Timestamp() override;
protected:
u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override;

View File

@ -163,7 +163,7 @@ void A_MSR_REG(ARM* cpu)
if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 val = cpu->R[cpu->CurInstr & 0xF];
// bit4 is forced to 1
val |= 0x00000010;
@ -216,7 +216,7 @@ void A_MCR(ARM* cpu)
u32 cn = (cpu->CurInstr >> 16) & 0xF;
u32 cm = cpu->CurInstr & 0xF;
u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
u32 val = cpu->GetReg((cpu->CurInstr>>12)&0xF);
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num==0 && cp==15)

View File

@ -160,14 +160,14 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry)
cpu->SetC(b & 0x80000000);
#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
u32 b = cpu->GetReg(cpu->CurInstr&0xF); \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 s = (cpu->CurInstr>>7)&0x1F; \
shiftop(b, s);
#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \
u32 b = cpu->GetReg(cpu->CurInstr&0xF); \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
if ((cpu->CurInstr&0xF)==15) b += 4; \
shiftop(b, (cpu->GetReg((cpu->CurInstr>>8)&0xF) & 0xFF));
shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF));
#define A_IMPLEMENT_ALU_OP(x,s) \
@ -313,7 +313,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
#define A_AND(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -326,7 +326,7 @@ void A_##x##_REG_ROR_REG(ARM* cpu) \
}
#define A_AND_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -344,7 +344,7 @@ A_IMPLEMENT_ALU_OP(AND,_S)
#define A_EOR(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -357,7 +357,7 @@ A_IMPLEMENT_ALU_OP(AND,_S)
}
#define A_EOR_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -375,7 +375,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
#define A_SUB(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -388,7 +388,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
}
#define A_SUB_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -408,7 +408,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
#define A_RSB(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -421,7 +421,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
}
#define A_RSB_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -441,7 +441,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
#define A_ADD(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -454,7 +454,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
}
#define A_ADD_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -474,7 +474,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
#define A_ADC(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -487,7 +487,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
}
#define A_ADC_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res_tmp = a + b; \
u32 carry = (cpu->CPSR&0x20000000 ? 1:0); \
u32 res = res_tmp + carry; \
@ -509,7 +509,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
#define A_SBC(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -522,7 +522,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
}
#define A_SBC_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res_tmp = a - b; \
u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
u32 res = res_tmp - carry; \
@ -544,7 +544,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
#define A_RSC(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -557,7 +557,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
}
#define A_RSC_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res_tmp = b - a; \
u32 carry = (cpu->CPSR&0x20000000 ? 0:1); \
u32 res = res_tmp - carry; \
@ -579,7 +579,7 @@ A_IMPLEMENT_ALU_OP(RSC,)
#define A_TST(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -589,7 +589,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
#define A_TEQ(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -599,7 +599,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
#define A_CMP(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -611,7 +611,7 @@ A_IMPLEMENT_ALU_TEST(CMP,)
#define A_CMN(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
@ -623,7 +623,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
#define A_ORR(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a | b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -636,7 +636,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
}
#define A_ORR_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a | b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -699,7 +699,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
#define A_BIC(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & ~b; \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
@ -712,7 +712,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
}
#define A_BIC_S(c) \
u32 a = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & ~b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
@ -761,12 +761,18 @@ A_IMPLEMENT_ALU_OP(MVN,_S)
void A_MUL(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 res = rm * rs;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
@ -780,55 +786,53 @@ void A_MUL(ARM* cpu)
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_MLA(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
u32 res = (rm * rs) + rn;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
}
void A_UMULL(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u64 res = (u64)rm * (u64)rs;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
@ -842,27 +846,26 @@ void A_UMULL(ARM* cpu)
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_UMLAL(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u64 res = (u64)rm * (u64)rs;
u64 rd = (u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL);
u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL);
res += rd;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
@ -876,24 +879,23 @@ void A_UMLAL(ARM* cpu)
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_SMULL(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
s64 res = (s64)(s32)rm * (s64)(s32)rs;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
@ -907,27 +909,26 @@ void A_SMULL(ARM* cpu)
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_SMLAL(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
s64 res = (s64)(s32)rm * (s64)(s32)rs;
s64 rd = (s64)((u64)cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1) | ((u64)cpu->GetReg((cpu->CurInstr >> 16) & 0xF) << 32ULL));
s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
res += rd;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
@ -939,24 +940,17 @@ void A_SMLAL(ARM* cpu)
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
}
cpu->AddCycles_CI(cycles);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
else cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Mul); // interlock cycles do not occur with S variants of multiply instructions
}
void A_SMLAxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -970,17 +964,16 @@ void A_SMLAxy(ARM* cpu)
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_SMLAWy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 12) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
if (cpu->CurInstr & (1<<6)) rs >>= 16;
else rs &= 0xFFFF;
@ -992,16 +985,15 @@ void A_SMLAWy(ARM* cpu)
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_SMULxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -1011,16 +1003,15 @@ void A_SMULxy(ARM* cpu)
u32 res = ((s16)rm * (s16)rs);
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_SMULWy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
if (cpu->CurInstr & (1<<6)) rs >>= 16;
else rs &= 0xFFFF;
@ -1028,16 +1019,15 @@ void A_SMULWy(ARM* cpu)
u32 res = ((s64)(s32)rm * (s16)rs) >> 16;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_SMLALxy(ARM* cpu)
{
if (cpu->Num != 0) return;
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 0);
u32 rs = cpu->GetReg((cpu->CurInstr >> 8) & 0xF, 0); // yeah this one actually doesn't need two interlock cycles to interlock
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
if (cpu->CurInstr & (1<<5)) rm >>= 16;
else rm &= 0xFFFF;
@ -1052,8 +1042,7 @@ void A_SMLALxy(ARM* cpu)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->AddCycles_CI(1);
cpu->SetCycles_L((cpu->CurInstr >> 16) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_CI(1); // TODO: interlock??
}
@ -1062,7 +1051,7 @@ void A_CLZ(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 val = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 val = cpu->R[cpu->CurInstr & 0xF];
u32 res = 0;
while ((val & 0xFF000000) == 0)
@ -1087,8 +1076,8 @@ void A_QADD(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 res = rm + rn;
if (OverflowAdd(rm, rn))
@ -1098,16 +1087,15 @@ void A_QADD(ARM* cpu)
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_QSUB(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 res = rm - rn;
if (OverflowSub(rm, rn))
@ -1117,16 +1105,15 @@ void A_QSUB(ARM* cpu)
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_QDADD(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
if (OverflowAdd(rn, rn))
{
@ -1144,16 +1131,15 @@ void A_QDADD(ARM* cpu)
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
void A_QDSUB(ARM* cpu)
{
if (cpu->Num != 0) return A_UNK(cpu);
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 rn = cpu->GetReg((cpu->CurInstr >> 16) & 0xF, 1);
u32 rm = cpu->R[cpu->CurInstr & 0xF];
u32 rn = cpu->R[(cpu->CurInstr >> 16) & 0xF];
if (OverflowAdd(rn, rn))
{
@ -1171,8 +1157,7 @@ void A_QDSUB(ARM* cpu)
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C();
cpu->SetCycles_L((cpu->CurInstr >> 12) & 0xF, 1, cpu->ILT_Norm);
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1183,7 +1168,7 @@ void A_QDSUB(ARM* cpu)
void T_LSL_IMM(ARM* cpu)
{
u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 s = (cpu->CurInstr >> 6) & 0x1F;
LSL_IMM_S(op, s);
cpu->R[cpu->CurInstr & 0x7] = op;
@ -1194,7 +1179,7 @@ void T_LSL_IMM(ARM* cpu)
void T_LSR_IMM(ARM* cpu)
{
u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 s = (cpu->CurInstr >> 6) & 0x1F;
LSR_IMM_S(op, s);
cpu->R[cpu->CurInstr & 0x7] = op;
@ -1205,7 +1190,7 @@ void T_LSR_IMM(ARM* cpu)
void T_ASR_IMM(ARM* cpu)
{
u32 op = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 op = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 s = (cpu->CurInstr >> 6) & 0x1F;
ASR_IMM_S(op, s);
cpu->R[cpu->CurInstr & 0x7] = op;
@ -1216,8 +1201,8 @@ void T_ASR_IMM(ARM* cpu)
void T_ADD_REG_(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7];
u32 res = a + b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
@ -1229,8 +1214,8 @@ void T_ADD_REG_(ARM* cpu)
void T_SUB_REG_(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 6) & 0x7];
u32 res = a - b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
@ -1242,7 +1227,7 @@ void T_SUB_REG_(ARM* cpu)
void T_ADD_IMM_(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 b = (cpu->CurInstr >> 6) & 0x7;
u32 res = a + b;
cpu->R[cpu->CurInstr & 0x7] = res;
@ -1255,7 +1240,7 @@ void T_ADD_IMM_(ARM* cpu)
void T_SUB_IMM_(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 b = (cpu->CurInstr >> 6) & 0x7;
u32 res = a - b;
cpu->R[cpu->CurInstr & 0x7] = res;
@ -1275,9 +1260,9 @@ void T_MOV_IMM(ARM* cpu)
cpu->AddCycles_C();
}
void T_CMP_IMM(ARM* cpu)
void T_CMP_IMM(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
u32 b = cpu->CurInstr & 0xFF;
u32 res = a - b;
cpu->SetNZCV(res & 0x80000000,
@ -1289,7 +1274,7 @@ void T_CMP_IMM(ARM* cpu)
void T_ADD_IMM(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
u32 b = cpu->CurInstr & 0xFF;
u32 res = a + b;
cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
@ -1302,7 +1287,7 @@ void T_ADD_IMM(ARM* cpu)
void T_SUB_IMM(ARM* cpu)
{
u32 a = cpu->GetReg((cpu->CurInstr >> 8) & 0x7);
u32 a = cpu->R[(cpu->CurInstr >> 8) & 0x7];
u32 b = cpu->CurInstr & 0xFF;
u32 res = a - b;
cpu->R[(cpu->CurInstr >> 8) & 0x7] = res;
@ -1316,8 +1301,8 @@ void T_SUB_IMM(ARM* cpu)
void T_AND_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a & b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1327,8 +1312,8 @@ void T_AND_REG(ARM* cpu)
void T_EOR_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a ^ b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1338,8 +1323,8 @@ void T_EOR_REG(ARM* cpu)
void T_LSL_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF;
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
LSL_REG_S(a, b);
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
@ -1349,8 +1334,8 @@ void T_LSL_REG(ARM* cpu)
void T_LSR_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF;
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
LSR_REG_S(a, b);
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
@ -1360,8 +1345,8 @@ void T_LSR_REG(ARM* cpu)
void T_ASR_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF;
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
ASR_REG_S(a, b);
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
@ -1371,8 +1356,8 @@ void T_ASR_REG(ARM* cpu)
void T_ADC_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res_tmp = a + b;
u32 carry = (cpu->CPSR&0x20000000 ? 1:0);
u32 res = res_tmp + carry;
@ -1386,8 +1371,8 @@ void T_ADC_REG(ARM* cpu)
void T_SBC_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res_tmp = a - b;
u32 carry = (cpu->CPSR&0x20000000 ? 0:1);
u32 res = res_tmp - carry;
@ -1401,8 +1386,8 @@ void T_SBC_REG(ARM* cpu)
void T_ROR_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7, 1);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) & 0xFF;
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7] & 0xFF;
ROR_REG_S(a, b);
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
@ -1412,8 +1397,8 @@ void T_ROR_REG(ARM* cpu)
void T_TST_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a & b;
cpu->SetNZ(res & 0x80000000,
!res);
@ -1422,7 +1407,7 @@ void T_TST_REG(ARM* cpu)
void T_NEG_REG(ARM* cpu)
{
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = -b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZCV(res & 0x80000000,
@ -1434,8 +1419,8 @@ void T_NEG_REG(ARM* cpu)
void T_CMP_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a - b;
cpu->SetNZCV(res & 0x80000000,
!res,
@ -1446,8 +1431,8 @@ void T_CMP_REG(ARM* cpu)
void T_CMN_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a + b;
cpu->SetNZCV(res & 0x80000000,
!res,
@ -1458,8 +1443,8 @@ void T_CMN_REG(ARM* cpu)
void T_ORR_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a | b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1469,8 +1454,8 @@ void T_ORR_REG(ARM* cpu)
void T_MUL_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a * b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1494,8 +1479,8 @@ void T_MUL_REG(ARM* cpu)
void T_BIC_REG(ARM* cpu)
{
u32 a = cpu->GetReg(cpu->CurInstr & 0x7);
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 a = cpu->R[cpu->CurInstr & 0x7];
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = a & ~b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1505,7 +1490,7 @@ void T_BIC_REG(ARM* cpu)
void T_MVN_REG(ARM* cpu)
{
u32 b = cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
u32 b = cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 res = ~b;
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
@ -1522,8 +1507,8 @@ void T_ADD_HIREG(ARM* cpu)
u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
u32 rs = (cpu->CurInstr >> 3) & 0xF;
u32 a = cpu->GetReg(rd);
u32 b = cpu->GetReg(rs);
u32 a = cpu->R[rd];
u32 b = cpu->R[rs];
cpu->AddCycles_C();
@ -1542,8 +1527,8 @@ void T_CMP_HIREG(ARM* cpu)
u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
u32 rs = (cpu->CurInstr >> 3) & 0xF;
u32 a = cpu->GetReg(rd);
u32 b = cpu->GetReg(rs);
u32 a = cpu->R[rd];
u32 b = cpu->R[rs];
u32 res = a - b;
cpu->SetNZCV(res & 0x80000000,
@ -1562,11 +1547,11 @@ void T_MOV_HIREG(ARM* cpu)
if (rd == 15)
{
cpu->JumpTo(cpu->GetReg(rs) | 1);
cpu->JumpTo(cpu->R[rs] | 1);
}
else
{
cpu->R[rd] = cpu->GetReg(rs);
cpu->R[rd] = cpu->R[rs];
}
// nocash-style debugging hook
@ -1583,7 +1568,7 @@ void T_MOV_HIREG(ARM* cpu)
}
void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock?
void T_ADD_PCREL(ARM* cpu)
{
u32 val = cpu->R[15] & ~2;
val += ((cpu->CurInstr & 0xFF) << 2);
@ -1591,7 +1576,7 @@ void T_ADD_PCREL(ARM* cpu) // checkme: pc shouldn't be able to interlock?
cpu->AddCycles_C();
}
void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb?
void T_ADD_SPREL(ARM* cpu)
{
u32 val = cpu->R[13];
val += ((cpu->CurInstr & 0xFF) << 2);
@ -1599,7 +1584,7 @@ void T_ADD_SPREL(ARM* cpu) // checkme: sp shouldn't be able to interlock in thum
cpu->AddCycles_C();
}
void T_ADD_SP(ARM* cpu) // checkme: sp shouldn't be able to interlock in thumb?
void T_ADD_SP(ARM* cpu)
{
u32 val = cpu->R[13];
if (cpu->CurInstr & (1<<7))

View File

@ -46,15 +46,15 @@ void A_BLX_IMM(ARM* cpu)
cpu->JumpTo(cpu->R[15] + offset + 1);
}
void A_BX(ARM* cpu) // verify interlock
void A_BX(ARM* cpu)
{
cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF));
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
}
void A_BLX_REG(ARM* cpu) // verify interlock
void A_BLX_REG(ARM* cpu)
{
u32 lr = cpu->R[15] - 4;
cpu->JumpTo(cpu->GetReg(cpu->CurInstr & 0xF));
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
cpu->R[14] = lr;
}
@ -71,12 +71,12 @@ void T_BCOND(ARM* cpu)
cpu->AddCycles_C();
}
void T_BX(ARM* cpu) // verify interlock
void T_BX(ARM* cpu)
{
cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF));
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
}
void T_BLX_REG(ARM* cpu) // verify interlock
void T_BLX_REG(ARM* cpu)
{
if (cpu->Num==1)
{
@ -85,7 +85,7 @@ void T_BLX_REG(ARM* cpu) // verify interlock
}
u32 lr = cpu->R[15] - 1;
cpu->JumpTo(cpu->GetReg((cpu->CurInstr >> 3) & 0xF));
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
cpu->R[14] = lr;
}

View File

@ -53,7 +53,7 @@ namespace melonDS::ARMInterpreter
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
#define A_WB_CALC_OFFSET_REG(shiftop) \
u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
u32 shift = ((cpu->CurInstr>>7)&0x1F); \
shiftop(offset, shift); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
@ -61,8 +61,8 @@ namespace melonDS::ARMInterpreter
#define A_STR \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 0xF) \
storeval += 4; \
bool dataabort = !cpu->DataWrite32(offset, storeval); \
@ -72,8 +72,8 @@ namespace melonDS::ARMInterpreter
// TODO: user mode (bit21)
#define A_STR_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 0xF) \
storeval += 4; \
bool dataabort = !cpu->DataWrite32(addr, storeval); \
@ -82,8 +82,8 @@ namespace melonDS::ARMInterpreter
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_STRB \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \
bool dataabort = !cpu->DataWrite8(offset, storeval); \
cpu->AddCycles_CD_STR(); \
@ -92,8 +92,8 @@ namespace melonDS::ARMInterpreter
// TODO: user mode (bit21)
#define A_STRB_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \
bool dataabort = !cpu->DataWrite8(addr, storeval); \
cpu->AddCycles_CD_STR(); \
@ -101,7 +101,7 @@ namespace melonDS::ARMInterpreter
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDR \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead32(offset, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
@ -115,12 +115,11 @@ namespace melonDS::ARMInterpreter
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (offset & 3) ? 2 : 1, cpu->ILT_Norm); \
}
// TODO: user mode
#define A_LDR_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead32(addr, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
@ -134,37 +133,26 @@ namespace melonDS::ARMInterpreter
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, (addr & 3) ? 2 : 1, cpu->ILT_Norm); \
}
#define A_LDRB \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
}
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val;
// TODO: user mode
#define A_LDRB_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
}
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val;
@ -242,14 +230,14 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
#define A_HD_CALC_OFFSET_REG \
u32 offset = cpu->GetReg(cpu->CurInstr & 0xF); \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
#define A_STRH \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \
bool dataabort = !cpu->DataWrite16(offset, storeval); \
cpu->AddCycles_CD_STR(); \
@ -257,8 +245,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_STRH_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 storeval = cpu->GetReg((cpu->CurInstr>>12) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 storeval = cpu->R[(cpu->CurInstr>>12) & 0xF]; \
if (((cpu->CurInstr>>12) & 0xF) == 15) storeval+=4; \
bool dataabort = !cpu->DataWrite16(addr, storeval); \
cpu->AddCycles_CD_STR(); \
@ -269,47 +257,35 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
#define A_LDRD \
if (cpu->Num != 0) return; \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
if (!cpu->DataRead32 (offset, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \
u32 val; bool dataabort = !cpu->DataRead32S(offset+4, &val); \
if (!cpu->DataRead32 (offset , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \
u32 val; if (!cpu->DataRead32S(offset+4, &val)) {cpu->AddCycles_CDI(); return;} \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI_LDM(); \
if (dataabort) return; \
if (r == 14) \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else \
{ \
cpu->R[r+1] = val; \
cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \
} \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRD_POST \
if (cpu->Num != 0) return; \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
if (!cpu->DataRead32 (addr, &cpu->R[r])) {cpu->AddCycles_CDI_LDR(); return;} \
u32 val; bool dataabort = !cpu->DataRead32S(addr+4, &val); \
if (!cpu->DataRead32 (addr , &cpu->R[r ])) {cpu->AddCycles_CDI(); return;} \
u32 val; if (!cpu->DataRead32S(addr+4, &val)) {cpu->AddCycles_CDI(); return;} \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI_LDM(); \
if (dataabort) return; \
if (r == 14) \
cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else \
{ \
cpu->R[r+1] = val; \
cpu->SetCycles_L(r+1, 1, cpu->ILT_Norm); \
} \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_STRD \
if (cpu->Num != 0) return; \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
bool dataabort = !cpu->DataWrite32(offset, cpu->GetReg(r)); /* yes, this data abort behavior is on purpose */ \
u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \
bool dataabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dataabort |= !cpu->DataWrite32S (offset+4, storeval, dataabort); /* no, i dont understand it either */ \
cpu->AddCycles_CD_STM(); \
if (dataabort) return; \
@ -317,102 +293,72 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
#define A_STRD_POST \
if (cpu->Num != 0) return; \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
bool dataabort = !cpu->DataWrite32(addr, cpu->GetReg(r)); \
u32 storeval = cpu->GetReg(r+1, cpu->DataCycles); if (r == 14) storeval+=4; \
bool dataabort = !cpu->DataWrite32(addr, cpu->R[r]); \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dataabort |= !cpu->DataWrite32S (addr+4, storeval, dataabort); \
cpu->AddCycles_CD_STM(); \
if (dataabort) return; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRH \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRH_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRSB \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead8(offset, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
val = (s32)(s8)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRSB_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead8(addr, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
val = (s32)(s8)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRSH \
offset += cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead16(offset, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
val = (s32)(s16)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRSH_POST \
u32 addr = cpu->GetReg((cpu->CurInstr>>16) & 0xF); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 val; bool dataabort = !cpu->DataRead16(addr, &val); \
cpu->AddCycles_CDI_LDR(); \
if (dataabort) return; \
val = (s32)(s16)val; \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
cpu->JumpTo8_16Bit(val); \
else \
{ \
cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->SetCycles_L((cpu->CurInstr>>12) & 0xF, 2, cpu->ILT_Norm); \
} \
if (((cpu->CurInstr>>12) & 0xF) == 15) cpu->JumpTo8_16Bit(val); \
else cpu->R[(cpu->CurInstr>>12) & 0xF] = val; \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
@ -452,8 +398,8 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH)
void A_SWP(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1);
u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF);
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->R[cpu->CurInstr & 0xF];
if ((cpu->CurInstr & 0xF) == 15) rm += 4;
u32 val;
@ -462,38 +408,20 @@ void A_SWP(ARM* cpu)
u32 numD = cpu->DataCycles;
if (cpu->DataWrite32(base, rm))
{
cpu->AddCycles_CDI_SWP();
// rd only gets updated if both read and write succeed
u32 rd = (cpu->CurInstr >> 12) & 0xF;
if (rd != 15)
{
cpu->R[rd] = ROR(val, 8*(base&0x3));
if (cpu->Num == 0)
{
u32 cycles;
if (base & 3) // add an extra interlock cycle when doing a misaligned load from a non-itcm address (checkme: does it matter whether you're executing from there?)
{
cycles = ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2;
}
else cycles = 1;
cpu->SetCycles_L(rd, cycles, cpu->ILT_Norm);
}
}
else if (cpu->Num == 1) // for some reason these jumps don't work on the arm 9?
cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1, cpu->ILT_Norm);
if (rd != 15) cpu->R[rd] = ROR(val, 8*(base&0x3));
else if (cpu->Num==1) cpu->JumpTo(ROR(val, 8*(base&0x3)) & ~1); // for some reason these jumps don't work on the arm 9?
}
else cpu->AddCycles_CDI_SWP();
cpu->DataCycles += numD;
}
else cpu->AddCycles_CDI_SWP();
cpu->AddCycles_CDI_SWP();
}
void A_SWPB(ARM* cpu)
{
u32 rm = cpu->GetReg(cpu->CurInstr & 0xF, 1) & 0xFF;
u32 base = cpu->GetReg((cpu->CurInstr >> 16) & 0xF);
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->R[cpu->CurInstr & 0xF] & 0xFF;
if ((cpu->CurInstr & 0xF) == 15) rm += 4;
u32 val;
@ -502,24 +430,14 @@ void A_SWPB(ARM* cpu)
u32 numD = cpu->DataCycles;
if (cpu->DataWrite8(base, rm))
{
cpu->AddCycles_CDI_SWP();
// rd only gets updated if both read and write succeed
u32 rd = (cpu->CurInstr >> 12) & 0xF;
if (rd != 15)
{
cpu->R[rd] = val;
// add an extra interlock cycle when doing a load from a non-itcm address (checkme: does it matter whether you're executing from there?)
if (cpu->Num == 0)
cpu->SetCycles_L(rd, ((base < ((ARMv5*)cpu)->ITCMSize) && ((cpu->R[15]-8) < ((ARMv5*)cpu)->ITCMSize)) ? 1 : 2, cpu->ILT_Norm);
}
else if (cpu->Num == 1)// for some reason these jumps don't work on the arm 9?
cpu->JumpTo(val & ~1);
if (rd != 15) cpu->R[rd] = val;
else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't work on the arm 9?
}
else cpu->AddCycles_CDI_SWP();
cpu->DataCycles += numD;
}
else cpu->AddCycles_CDI_SWP();
cpu->AddCycles_CDI_SWP();
}
@ -527,12 +445,11 @@ void A_SWPB(ARM* cpu)
void A_LDM(ARM* cpu)
{
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
u32 base = cpu->GetReg(baseid, 1);
u32 base = cpu->R[baseid];
u32 wbbase;
u32 oldbase = base;
u32 preinc = (cpu->CurInstr & (1<<24));
bool first = true;
u32 lastreg = 0; // TODO: this doesn't support 0 reg LDMs (do those even work?)
if (!(cpu->CurInstr & (1<<23))) // decrement
{
@ -568,7 +485,6 @@ void A_LDM(ARM* cpu)
}
first = false;
lastreg = i;
if (!preinc) base += 4;
}
}
@ -582,26 +498,12 @@ void A_LDM(ARM* cpu)
{
goto dataabort;
}
cpu->AddCycles_CDI_LDM();
if (!preinc) base += 4;
if (cpu->Num == 1)
pc &= ~0x1;
}
else
{
cpu->AddCycles_CDI_LDM();
if (cpu->Num == 0)
{
u32 lastbase = base;
if (!preinc) lastbase -= 4;
// no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?)
if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg))))
cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm);
}
}
// switch back to previous regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
@ -635,8 +537,6 @@ void A_LDM(ARM* cpu)
if (false)
{
dataabort:
cpu->AddCycles_CDI_LDM();
// CHECKME: interlock shouldn't apply when it data aborts, right?
// switch back to original set of regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
@ -645,12 +545,14 @@ void A_LDM(ARM* cpu)
// restore original value of base in case the reg got written to
cpu->R[baseid] = oldbase;
}
cpu->AddCycles_CDI_LDM();
}
void A_STM(ARM* cpu)
{
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
u32 base = cpu->GetReg(baseid, 1);
u32 base = cpu->R[baseid];
u32 oldbase = base;
u32 preinc = (cpu->CurInstr & (1<<24));
bool first = true;
@ -694,7 +596,7 @@ void A_STM(ARM* cpu)
val = oldbase;
else val = base;
}
else val = cpu->GetReg(i, 1+cpu->DataCycles);
else val = cpu->R[i];
if (i == 15) val+=4;
@ -738,170 +640,160 @@ void A_STM(ARM* cpu)
void T_LDR_PCREL(ARM* cpu) // checkme: can pc be interlocked?
void T_LDR_PCREL(ARM* cpu)
{
u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count
}
void T_STR_REG(ARM* cpu)
void T_STR_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
cpu->DataWrite32(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1));
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
cpu->DataWrite32(addr, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_STRB_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
cpu->DataWrite8(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1));
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
cpu->DataWrite8(addr, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_LDR_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
u32 val;
if (cpu->DataRead32(addr, &val))
cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(addr&0x3));
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, (addr & 3) ? 2 : 1, cpu->ILT_Norm);
}
void T_LDRB_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_STRH_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
cpu->DataWrite16(addr, cpu->GetReg(cpu->CurInstr & 0x7, 1));
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
cpu->DataWrite16(addr, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_LDRSB_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
if (cpu->DataRead8(addr, &cpu->R[cpu->CurInstr & 0x7]))
cpu->R[cpu->CurInstr & 0x7] = (s32)(s8)cpu->R[cpu->CurInstr & 0x7];
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_LDRH_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_LDRSH_REG(ARM* cpu)
{
u32 addr = cpu->GetReg((cpu->CurInstr >> 3) & 0x7) + cpu->GetReg((cpu->CurInstr >> 6) & 0x7);
u32 addr = cpu->R[(cpu->CurInstr >> 3) & 0x7] + cpu->R[(cpu->CurInstr >> 6) & 0x7];
if (cpu->DataRead16(addr, &cpu->R[cpu->CurInstr & 0x7]))
cpu->R[cpu->CurInstr & 0x7] = (s32)(s16)cpu->R[cpu->CurInstr & 0x7];
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_STR_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 4) & 0x7C;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
cpu->DataWrite32(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1));
cpu->AddCycles_CD_STR();
cpu->DataWrite32(offset, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_LDR();
}
void T_LDR_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 4) & 0x7C;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
u32 val;
if (cpu->DataRead32(offset, &val))
cpu->R[cpu->CurInstr & 0x7] = ROR(val, 8*(offset&0x3));
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, (offset & 3) ? 2 : 1, cpu->ILT_Norm);
}
void T_STRB_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 6) & 0x1F;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
cpu->DataWrite8(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1));
cpu->DataWrite8(offset, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_LDRB_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 6) & 0x1F;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
cpu->DataRead8(offset, &cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_STRH_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 5) & 0x3E;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
cpu->DataWrite16(offset, cpu->GetReg(cpu->CurInstr & 0x7, 1));
cpu->DataWrite16(offset, cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_LDRH_IMM(ARM* cpu)
{
u32 offset = (cpu->CurInstr >> 5) & 0x3E;
offset += cpu->GetReg((cpu->CurInstr >> 3) & 0x7);
offset += cpu->R[(cpu->CurInstr >> 3) & 0x7];
cpu->DataRead16(offset, &cpu->R[cpu->CurInstr & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L(cpu->CurInstr & 0x7, 2, cpu->ILT_Norm);
}
void T_STR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode?
void T_STR_SPREL(ARM* cpu)
{
u32 offset = (cpu->CurInstr << 2) & 0x3FC;
offset += cpu->R[13];
cpu->DataWrite32(offset, cpu->GetReg((cpu->CurInstr >> 8) & 0x7, 1));
cpu->DataWrite32(offset, cpu->R[(cpu->CurInstr >> 8) & 0x7]);
cpu->AddCycles_CD_STR();
}
void T_LDR_SPREL(ARM* cpu) // checkme: can sp be interlocked in thumb mode?
void T_LDR_SPREL(ARM* cpu)
{
u32 offset = (cpu->CurInstr << 2) & 0x3FC;
offset += cpu->R[13];
cpu->DataRead32(offset, &cpu->R[(cpu->CurInstr >> 8) & 0x7]);
cpu->AddCycles_CDI_LDR();
cpu->SetCycles_L((cpu->CurInstr >> 8) & 0x7, 1, cpu->ILT_Norm); // checkme: verify cycle count
}
@ -919,7 +811,7 @@ void T_PUSH(ARM* cpu)
if (cpu->CurInstr & (1<<8))
nregs++;
u32 base = cpu->GetReg(13);
u32 base = cpu->R[13];
base -= (nregs<<2);
u32 wbbase = base;
@ -927,8 +819,8 @@ void T_PUSH(ARM* cpu)
{
if (cpu->CurInstr & (1<<i))
{
if (!(first ? cpu->DataWrite32 (base, cpu->GetReg(i, 1))
: cpu->DataWrite32S(base, cpu->GetReg(i, 1)))) // verify interlock
if (!(first ? cpu->DataWrite32 (base, cpu->R[i])
: cpu->DataWrite32S(base, cpu->R[i])))
{
goto dataabort;
}
@ -952,11 +844,10 @@ void T_PUSH(ARM* cpu)
cpu->AddCycles_CD_STM();
}
void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode?
void T_POP(ARM* cpu)
{
u32 base = cpu->R[13];
bool first = true;
u32 lastreg = 0;
for (int i = 0; i < 8; i++)
{
@ -986,16 +877,6 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode?
}
cpu->R[13] = base;
cpu->AddCycles_CDI_LDM();
if (cpu->Num == 0)
{
u32 lastbase = base - 4;
// no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?)
if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg))))
cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm);
}
return;
dataabort:
cpu->AddCycles_CDI_LDM();
@ -1003,15 +884,15 @@ void T_POP(ARM* cpu) // checkme: can sp be interlocked in thumb mode?
void T_STMIA(ARM* cpu)
{
u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7);
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
for (int i = 0; i < 8; i++)
{
if (cpu->CurInstr & (1<<i))
{
if (!(first ? cpu->DataWrite32 (base, cpu->GetReg(i, 1))
: cpu->DataWrite32S(base, cpu->GetReg(i, 1))))
if (!(first ? cpu->DataWrite32 (base, cpu->R[i])
: cpu->DataWrite32S(base, cpu->R[i])))
{
goto dataabort;
}
@ -1028,9 +909,8 @@ void T_STMIA(ARM* cpu)
void T_LDMIA(ARM* cpu)
{
u32 base = cpu->GetReg((cpu->CurInstr >> 8) & 0x7);
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
u32 lastreg = 0;
for (int i = 0; i < 8; i++)
{
@ -1043,23 +923,11 @@ void T_LDMIA(ARM* cpu)
}
first = false;
base += 4;
lastreg = i;
}
}
if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7))))
cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
cpu->AddCycles_CDI_LDM();
if (cpu->Num == 0)
{
u32 lastbase = base - 4;
// no interlock occurs when loading from itcm (checkme: does it matter whether you're executing from there?)
if ((((ARMv5*)cpu)->ITCMSize < lastbase) && ((cpu->R[15]-8) > ((ARMv5*)cpu)->ITCMSize) && (cpu->CurInstr & (0x7FFF >> (15 - lastreg))))
cpu->SetCycles_L(lastreg, 1, cpu->ILT_Norm);
}
return;
dataabort:
cpu->AddCycles_CDI_LDM();