This commit is contained in:
Jakly 2024-11-12 12:56:34 +01:00 committed by GitHub
commit 30d1bbec94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1074 additions and 472 deletions

View File

@ -222,7 +222,7 @@ void ARM::DoSavestate(Savestate* file)
file->VarArray(R_ABT, 3*sizeof(u32));
file->VarArray(R_IRQ, 3*sizeof(u32));
file->VarArray(R_UND, 3*sizeof(u32));
file->Var32(&CurInstr);
file->Var64(&CurInstr);
#ifdef JIT_ENABLED
if (file->Saving && NDS.IsJITEnabled())
{
@ -232,7 +232,7 @@ void ARM::DoSavestate(Savestate* file)
FillPipeline();
}
#endif
file->VarArray(NextInstr, 2*sizeof(u32));
file->VarArray(NextInstr, 2*sizeof(u64));
file->Var32(&ExceptionBase);
@ -344,12 +344,6 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
CPSR &= ~0x20;
}
if (!(PU_Map[addr>>12] & 0x04))
{
PrefetchAbort();
return;
}
NDS.MonitorARM9Jump(addr);
}
@ -518,6 +512,7 @@ void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony)
}
}
template <CPUExecuteMode mode>
void ARM::TriggerIRQ()
{
if (CPSR & 0x80)
@ -529,7 +524,12 @@ void ARM::TriggerIRQ()
UpdateMode(oldcpsr, CPSR);
R_IRQ[2] = oldcpsr;
R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0);
#ifdef JIT_ENABLED
if constexpr (mode == CPUExecuteMode::JIT)
R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0);
else
#endif
R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4);
JumpTo(ExceptionBase + 0x18);
// ARDS cheat support
@ -540,6 +540,11 @@ void ARM::TriggerIRQ()
NDS.AREngine.RunCheats();
}
}
template void ARM::TriggerIRQ<CPUExecuteMode::Interpreter>();
template void ARM::TriggerIRQ<CPUExecuteMode::InterpreterGDB>();
#ifdef JIT_ENABLED
template void ARM::TriggerIRQ<CPUExecuteMode::JIT>();
#endif
void ARMv5::PrefetchAbort()
{
@ -550,17 +555,8 @@ void ARMv5::PrefetchAbort()
CPSR |= 0x97;
UpdateMode(oldcpsr, CPSR);
// this shouldn't happen, but if it does, we're stuck in some nasty endless loop
// so better take care of it
if (!(PU_Map[ExceptionBase>>12] & 0x04))
{
Log(LogLevel::Error, "!!!!! EXCEPTION REGION NOT EXECUTABLE. THIS IS VERY BAD!!\n");
NDS.Stop(Platform::StopReason::BadExceptionRegion);
return;
}
R_ABT[2] = oldcpsr;
R[14] = R[15] + (oldcpsr & 0x20 ? 2 : 0);
R[14] = R[15] - (oldcpsr & 0x20 ? 0 : 4);
JumpTo(ExceptionBase + 0x0C);
}
@ -599,7 +595,13 @@ void ARMv5::Execute()
{
Halted = 0;
if (NDS.IME[0] & 0x1)
TriggerIRQ();
{
#ifdef JIT_ENABLED
if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ<mode>();
else
#endif
IRQ = 1;
}
}
else
{
@ -634,7 +636,7 @@ void ARMv5::Execute()
{
// this order is crucial otherwise idle loops waiting for an IRQ won't function
if (IRQ)
TriggerIRQ();
TriggerIRQ<mode>();
if (Halted || IdleLoop)
{
@ -662,10 +664,18 @@ void ARMv5::Execute()
NextInstr[0] = NextInstr[1];
if (R[15] & 0x2) { NextInstr[1] >>= 16; CodeCycles = 0; }
else NextInstr[1] = CodeRead32(R[15], false);
// actually execute
u32 icode = (CurInstr >> 6) & 0x3FF;
ARMInterpreter::THUMBInstrTable[icode](this);
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions
{
PrefetchAbort();
}
else [[likely]] // actually execute
{
u32 icode = (CurInstr >> 6) & 0x3FF;
ARMInterpreter::THUMBInstrTable[icode](this);
}
}
else
{
@ -677,9 +687,14 @@ void ARMv5::Execute()
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
NextInstr[1] = CodeRead32(R[15], false);
// actually execute
if (CheckCondition(CurInstr >> 28))
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions
{
PrefetchAbort();
}
else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute
{
u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
ARMInterpreter::ARMInstrTable[icode](this);
@ -688,6 +703,10 @@ void ARMv5::Execute()
{
ARMInterpreter::A_BLX_IMM(this);
}
else if ((CurInstr & 0x0FF000F0) == 0x01200070)
{
ARMInterpreter::A_BKPT(this); // always passes regardless of condition code
}
else
AddCycles_C();
}
@ -704,10 +723,8 @@ void ARMv5::Execute()
/*if (NDS::IF[0] & NDS::IE[0])
{
if (NDS::IME[0] & 0x1)
TriggerIRQ();
TriggerIRQ<mode>();
}*/
if (IRQ) TriggerIRQ();
}
NDS.ARM9Timestamp += Cycles;
@ -739,7 +756,10 @@ void ARMv4::Execute()
{
Halted = 0;
if (NDS.IME[1] & 0x1)
TriggerIRQ();
{
if constexpr (mode == CPUExecuteMode::JIT) TriggerIRQ<mode>();
else IRQ = 1;
}
}
else
{
@ -773,7 +793,7 @@ void ARMv4::Execute()
if (StopExecution)
{
if (IRQ)
TriggerIRQ();
TriggerIRQ<mode>();
if (Halted || IdleLoop)
{
@ -801,9 +821,13 @@ void ARMv4::Execute()
NextInstr[0] = NextInstr[1];
NextInstr[1] = CodeRead16(R[15]);
// actually execute
u32 icode = (CurInstr >> 6);
ARMInterpreter::THUMBInstrTable[icode](this);
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else
{
// actually execute
u32 icode = (CurInstr >> 6);
ARMInterpreter::THUMBInstrTable[icode](this);
}
}
else
{
@ -816,8 +840,8 @@ void ARMv4::Execute()
NextInstr[0] = NextInstr[1];
NextInstr[1] = CodeRead32(R[15]);
// actually execute
if (CheckCondition(CurInstr >> 28))
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (CheckCondition(CurInstr >> 28)) // actually execute
{
u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
ARMInterpreter::ARMInstrTable[icode](this);
@ -838,9 +862,8 @@ void ARMv4::Execute()
/*if (NDS::IF[1] & NDS::IE[1])
{
if (NDS::IME[1] & 0x1)
TriggerIRQ();
TriggerIRQ<mode>();
}*/
if (IRQ) TriggerIRQ();
}
NDS.ARM7Timestamp += Cycles;
@ -1113,70 +1136,78 @@ u32 ARMv5::ReadMem(u32 addr, int size)
}
#endif
void ARMv4::DataRead8(u32 addr, u32* val)
bool ARMv4::DataRead8(u32 addr, u32* val)
{
*val = BusRead8(addr);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][0];
return true;
}
void ARMv4::DataRead16(u32 addr, u32* val)
bool ARMv4::DataRead16(u32 addr, u32* val)
{
addr &= ~1;
*val = BusRead16(addr);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][0];
return true;
}
void ARMv4::DataRead32(u32 addr, u32* val)
bool ARMv4::DataRead32(u32 addr, u32* val)
{
addr &= ~3;
*val = BusRead32(addr);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][2];
return true;
}
void ARMv4::DataRead32S(u32 addr, u32* val)
bool ARMv4::DataRead32S(u32 addr, u32* val)
{
addr &= ~3;
*val = BusRead32(addr);
DataCycles += NDS.ARM7MemTimings[addr >> 15][3];
return true;
}
void ARMv4::DataWrite8(u32 addr, u8 val)
bool ARMv4::DataWrite8(u32 addr, u8 val)
{
BusWrite8(addr, val);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][0];
return true;
}
void ARMv4::DataWrite16(u32 addr, u16 val)
bool ARMv4::DataWrite16(u32 addr, u16 val)
{
addr &= ~1;
BusWrite16(addr, val);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][0];
return true;
}
void ARMv4::DataWrite32(u32 addr, u32 val)
bool ARMv4::DataWrite32(u32 addr, u32 val)
{
addr &= ~3;
BusWrite32(addr, val);
DataRegion = addr;
DataCycles = NDS.ARM7MemTimings[addr >> 15][2];
return true;
}
void ARMv4::DataWrite32S(u32 addr, u32 val)
bool ARMv4::DataWrite32S(u32 addr, u32 val)
{
addr &= ~3;
BusWrite32(addr, val);
DataCycles += NDS.ARM7MemTimings[addr >> 15][3];
return true;
}

View File

@ -128,19 +128,20 @@ public:
void UpdateMode(u32 oldmode, u32 newmode, bool phony = false);
template <CPUExecuteMode mode>
void TriggerIRQ();
void SetupCodeMem(u32 addr);
virtual void DataRead8(u32 addr, u32* val) = 0;
virtual void DataRead16(u32 addr, u32* val) = 0;
virtual void DataRead32(u32 addr, u32* val) = 0;
virtual void DataRead32S(u32 addr, u32* val) = 0;
virtual void DataWrite8(u32 addr, u8 val) = 0;
virtual void DataWrite16(u32 addr, u16 val) = 0;
virtual void DataWrite32(u32 addr, u32 val) = 0;
virtual void DataWrite32S(u32 addr, u32 val) = 0;
virtual bool DataRead8(u32 addr, u32* val) = 0;
virtual bool DataRead16(u32 addr, u32* val) = 0;
virtual bool DataRead32(u32 addr, u32* val) = 0;
virtual bool DataRead32S(u32 addr, u32* val) = 0;
virtual bool DataWrite8(u32 addr, u8 val) = 0;
virtual bool DataWrite16(u32 addr, u16 val) = 0;
virtual bool DataWrite32(u32 addr, u32 val) = 0;
virtual bool DataWrite32S(u32 addr, u32 val) = 0;
virtual void AddCycles_C() = 0;
virtual void AddCycles_CI(s32 numI) = 0;
@ -176,8 +177,8 @@ public:
u32 R_ABT[3];
u32 R_IRQ[3];
u32 R_UND[3];
u32 CurInstr;
u32 NextInstr[2];
u64 CurInstr;
u64 NextInstr[2];
u32 ExceptionBase;
@ -250,16 +251,16 @@ public:
void Execute();
// all code accesses are forced nonseq 32bit
u32 CodeRead32(u32 addr, bool branch);
u64 CodeRead32(u32 addr, bool branch);
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u32* val) override;
bool DataRead16(u32 addr, u32* val) override;
bool DataRead32(u32 addr, u32* val) override;
bool DataRead32S(u32 addr, u32* val) override;
bool DataWrite8(u32 addr, u8 val) override;
bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override;
void AddCycles_C() override
{
@ -399,18 +400,19 @@ public:
return BusRead32(addr);
}
void DataRead8(u32 addr, u32* val) override;
void DataRead16(u32 addr, u32* val) override;
void DataRead32(u32 addr, u32* val) override;
void DataRead32S(u32 addr, u32* val) override;
void DataWrite8(u32 addr, u8 val) override;
void DataWrite16(u32 addr, u16 val) override;
void DataWrite32(u32 addr, u32 val) override;
void DataWrite32S(u32 addr, u32 val) override;
bool DataRead8(u32 addr, u32* val) override;
bool DataRead16(u32 addr, u32* val) override;
bool DataRead32(u32 addr, u32* val) override;
bool DataRead32S(u32 addr, u32* val) override;
bool DataWrite8(u32 addr, u8 val) override;
bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override;
void AddCycles_C() override;
void AddCycles_CI(s32 num) override;
void AddCycles_CDI() override;
void AddCycles_CD() override;
protected:
u8 BusRead8(u32 addr) override;
u16 BusRead16(u32 addr) override;

View File

@ -69,6 +69,14 @@ void T_UNK(ARM* cpu)
cpu->JumpTo(cpu->ExceptionBase + 0x04);
}
void A_BKPT(ARM* cpu)
{
if (cpu->Num == 1) A_UNK(cpu); // checkme
Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message
((ARMv5*)cpu)->PrefetchAbort();
}
void A_MSR_IMM(ARM* cpu)
@ -90,7 +98,8 @@ void A_MSR_IMM(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7
else cpu->AddCycles_CI(2); // arm 9
return;
}
}
@ -101,12 +110,9 @@ void A_MSR_IMM(ARM* cpu)
u32 mask = 0;
if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
if (!(cpu->CurInstr & (1<<22)))
mask &= 0xFFFFFFDF;
//if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9
//if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9
if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000);
if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
@ -121,7 +127,26 @@ void A_MSR_IMM(ARM* cpu)
if (!(cpu->CurInstr & (1<<22)))
cpu->UpdateMode(oldpsr, cpu->CPSR);
cpu->AddCycles_C();
if (cpu->CPSR & 0x20) [[unlikely]]
{
if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect.
else
{
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n");
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
if (cpu->Num != 1)
{
if (cpu->CurInstr & (1<<22))
{
cpu->AddCycles_CI(2); // spsr
}
else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc
else cpu->AddCycles_C();
}
else cpu->AddCycles_C();
}
void A_MSR_REG(ARM* cpu)
@ -143,7 +168,8 @@ void A_MSR_REG(ARM* cpu)
case 0x1A:
case 0x1B: psr = &cpu->R_UND[2]; break;
default:
cpu->AddCycles_C();
if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7
else cpu->AddCycles_CI(2); // arm 9
return;
}
}
@ -154,12 +180,9 @@ void A_MSR_REG(ARM* cpu)
u32 mask = 0;
if (cpu->CurInstr & (1<<16)) mask |= 0x000000FF;
if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00;
if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000;
if (cpu->CurInstr & (1<<19)) mask |= 0xFF000000;
if (!(cpu->CurInstr & (1<<22)))
mask &= 0xFFFFFFDF;
//if (cpu->CurInstr & (1<<17)) mask |= 0x0000FF00; // unused by arm 7 & 9
//if (cpu->CurInstr & (1<<18)) mask |= 0x00FF0000; // unused by arm 7 & 9
if (cpu->CurInstr & (1<<19)) mask |= ((cpu->Num==1) ? 0xF0000000 : 0xF8000000);
if ((cpu->CPSR & 0x1F) == 0x10) mask &= 0xFFFFFF00;
@ -174,7 +197,26 @@ void A_MSR_REG(ARM* cpu)
if (!(cpu->CurInstr & (1<<22)))
cpu->UpdateMode(oldpsr, cpu->CPSR);
cpu->AddCycles_C();
if (cpu->CPSR & 0x20) [[unlikely]]
{
if (cpu->Num == 0) cpu->R[15] += 2; // pc should actually increment by 4 one more time after switching to thumb mode without a pipeline flush, this gets the same effect.
else
{
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n");
cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least
}
}
if (cpu->Num != 1)
{
if (cpu->CurInstr & (1<<22))
{
cpu->AddCycles_CI(2); // spsr
}
else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc
else cpu->AddCycles_C();
}
else cpu->AddCycles_C();
}
void A_MRS(ARM* cpu)
@ -201,8 +243,15 @@ void A_MRS(ARM* cpu)
else
psr = cpu->CPSR;
cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
cpu->AddCycles_C();
if (((cpu->CurInstr>>12) & 0xF) == 15)
{
if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme
cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb?
}
else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr;
if (cpu->Num != 1) cpu->AddCycles_CI(1); // arm9
else cpu->AddCycles_C(); // arm7
}
@ -216,10 +265,12 @@ void A_MCR(ARM* cpu)
u32 cn = (cpu->CurInstr >> 16) & 0xF;
u32 cm = cpu->CurInstr & 0xF;
u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num==0 && cp==15)
{
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, cpu->R[(cpu->CurInstr>>12)&0xF]);
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, val);
}
else if (cpu->Num==1 && cp==14)
{
@ -244,10 +295,17 @@ void A_MRC(ARM* cpu)
u32 cn = (cpu->CurInstr >> 16) & 0xF;
u32 cm = cpu->CurInstr & 0xF;
u32 cpinfo = (cpu->CurInstr >> 5) & 0x7;
u32 rd = (cpu->CurInstr>>12) & 0xF;
if (cpu->Num==0 && cp==15)
{
cpu->R[(cpu->CurInstr>>12)&0xF] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo);
if (rd != 15) cpu->R[rd] = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo);
else
{
// r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status"
u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo) & 0xF0000000;
cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags;
}
}
else if (cpu->Num==1 && cp==14)
{
@ -259,12 +317,13 @@ void A_MRC(ARM* cpu)
return A_UNK(cpu); // TODO: check what kind of exception it really is
}
cpu->AddCycles_CI(2 + 1); // TODO: checkme
if (cpu->Num != 1) cpu->AddCycles_CI(1); // checkme
else cpu->AddCycles_CI(2 + 1); // TODO: checkme
}
void A_SVC(ARM* cpu)
void A_SVC(ARM* cpu) // A_SWI
{
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;
@ -276,7 +335,7 @@ void A_SVC(ARM* cpu)
cpu->JumpTo(cpu->ExceptionBase + 0x08);
}
void T_SVC(ARM* cpu)
void T_SVC(ARM* cpu) // T_SWI
{
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;

View File

@ -36,6 +36,7 @@ void A_MRS(ARM* cpu);
void A_MCR(ARM* cpu);
void A_MRC(ARM* cpu);
void A_SVC(ARM* cpu);
void A_BKPT(ARM* cpu);
void T_SVC(ARM* cpu);

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include "ARM.h"
#include "NDS.h"
#include "ARMInterpreter_MultiplySuperLLE.h"
namespace melonDS::ARMInterpreter
{
@ -581,8 +582,27 @@ A_IMPLEMENT_ALU_OP(RSC,)
#define A_TST(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
u32 oldpsr = cpu->CPSR; \
cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \
if (cpu->CPSR & 0x20) \
{ \
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TST T bit change on ARM7\n"); \
cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \
} \
} \
else cpu->JumpTo(res & ~1, true); /* TSTP dna, doesn't update flags */ \
} \
else \
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(TST,_S)
@ -591,8 +611,27 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
#define A_TEQ(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
u32 oldpsr = cpu->CPSR; \
cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \
if (cpu->CPSR & 0x20) \
{ \
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: TEQ T bit change on ARM7\n"); \
cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \
} \
} \
else cpu->JumpTo(res & ~1, true); /* TEQP dna, doesn't update flags */ \
} \
else \
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(TEQ,_S)
@ -601,10 +640,31 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
#define A_CMP(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
{ \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
u32 oldpsr = cpu->CPSR; \
cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \
if (cpu->CPSR & 0x20) \
{ \
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMP T bit change on ARM7\n"); \
cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \
} \
} \
else cpu->JumpTo(res & ~1, true); /* CMPP dna, doesn't update flags */ \
} \
else \
{ \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMP,)
@ -613,10 +673,31 @@ A_IMPLEMENT_ALU_TEST(CMP,)
#define A_CMN(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
{ \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
u32 oldpsr = cpu->CPSR; \
cpu->RestoreCPSR(); /* ARM7TDMI restores cpsr and does ___not___ flush the pipeline. */ \
if (cpu->CPSR & 0x20) \
{ \
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: CMN T bit change on ARM7\n"); \
cpu->CPSR &= ~0x20; /* keep it from crashing the emulator at least */ \
} \
} \
else cpu->JumpTo(res & ~1, true); /* CMNP dna, doesn't update flags */ \
} \
else \
{ \
cpu->SetNZCV(res & 0x80000000, \
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMN,)
@ -766,12 +847,14 @@ void A_MUL(ARM* cpu)
u32 res = rm * rs;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
// all multiply instructions fail writes to r15 on arm7/9
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -783,6 +866,7 @@ void A_MUL(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 3;
else cycles = 4;
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, 0, cycles==4));
}
cpu->AddCycles_CI(cycles);
@ -795,13 +879,14 @@ void A_MLA(ARM* cpu)
u32 rn = cpu->R[(cpu->CurInstr >> 12) & 0xF];
u32 res = (rm * rs) + rn;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
@ -813,6 +898,7 @@ void A_MLA(ARM* cpu)
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(MULSCarry(rm, rs, rn, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -825,24 +911,27 @@ void A_UMULL(ARM* cpu)
u64 res = (u64)rm * (u64)rs;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(0, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -857,25 +946,28 @@ void A_UMLAL(ARM* cpu)
u64 rd = (u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL);
res += rd;
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(UMULLSCarry(rd, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -887,25 +979,28 @@ void A_SMULL(ARM* cpu)
u32 rs = cpu->R[(cpu->CurInstr >> 8) & 0xF];
s64 res = (s64)(s32)rm * (s64)(s32)rs;
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(0, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -920,25 +1015,28 @@ void A_SMLAL(ARM* cpu)
s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
res += rd;
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (cpu->CurInstr & (1<<20))
{
cpu->SetNZ((u32)(res >> 63ULL),
!res);
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 3 : 1;
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
else
{
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
if (cpu->CurInstr & (1<<20)) cpu->SetC(SMULLSCarry(rd, rm, rs, cycles==5));
}
cpu->AddCycles_CI(cycles);
@ -959,8 +1057,10 @@ void A_SMLAxy(ARM* cpu)
u32 res_mul = ((s16)rm * (s16)rs);
u32 res = res_mul + rn;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
@ -980,8 +1080,9 @@ void A_SMLAWy(ARM* cpu)
u32 res_mul = ((s64)(s32)rm * (s16)rs) >> 16;
u32 res = res_mul + rn;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
@ -1001,8 +1102,9 @@ void A_SMULxy(ARM* cpu)
else rs &= 0xFFFF;
u32 res = ((s16)rm * (s16)rs);
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1017,8 +1119,9 @@ void A_SMULWy(ARM* cpu)
else rs &= 0xFFFF;
u32 res = ((s64)(s32)rm * (s16)rs) >> 16;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1039,8 +1142,11 @@ void A_SMLALxy(ARM* cpu)
s64 rd = (s64)((u64)cpu->R[(cpu->CurInstr >> 12) & 0xF] | ((u64)cpu->R[(cpu->CurInstr >> 16) & 0xF] << 32ULL));
res += rd;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = (u32)res;
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
cpu->AddCycles_CI(1); // TODO: interlock??
}
@ -1067,7 +1173,8 @@ void A_CLZ(ARM* cpu)
val |= 0x1;
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1);
else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C();
}
@ -1085,7 +1192,10 @@ void A_QADD(ARM* cpu)
cpu->CPSR |= 0x08000000;
}
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
// all saturated math instructions fail writes to r15
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1102,8 +1212,10 @@ void A_QSUB(ARM* cpu)
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
}
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1128,8 +1240,10 @@ void A_QDADD(ARM* cpu)
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
}
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1154,8 +1268,10 @@ void A_QDSUB(ARM* cpu)
res = (res & 0x80000000) ? 0x7FFFFFFF : 0x80000000;
cpu->CPSR |= 0x08000000;
}
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
}
@ -1460,18 +1576,18 @@ void T_MUL_REG(ARM* cpu)
cpu->SetNZ(res & 0x80000000,
!res);
s32 cycles = 0;
s32 cycles;
if (cpu->Num == 0)
{
cycles += 3;
cycles = 3;
}
else
{
cpu->SetC(0); // carry flag destroyed, they say. whatever that means...
if (a & 0xFF000000) cycles += 4;
else if (a & 0x00FF0000) cycles += 3;
else if (a & 0x0000FF00) cycles += 2;
else cycles += 1;
if ((a & 0xFFFFFF00) == 0x00000000 || (a & 0xFFFFFF00) == 0xFFFFFF00) cycles = 1;
else if ((a & 0xFFFF0000) == 0x00000000 || (a & 0xFFFF0000) == 0xFFFF0000) cycles = 2;
else if ((a & 0xFF000000) == 0x00000000 || (a & 0xFF000000) == 0xFF000000) cycles = 3;
else cycles = 4;
cpu->SetC(MULSCarry(b, a, 0, cycles==4)); // carry flag destroyed, they say. whatever that means...
}
cpu->AddCycles_CI(cycles);
}
@ -1534,6 +1650,18 @@ void T_CMP_HIREG(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if ((cpu->Num == 1) && (rd == 15))
{
u32 oldpsr = cpu->CPSR;
cpu->RestoreCPSR(); // ARM7TDMI restores cpsr and does ___not___ flush the pipeline.
if (!(cpu->CPSR & 0x20))
{
Platform::Log(Platform::LogLevel::Warn, "UNIMPLEMENTED: MSR REG T bit change on ARM7\n");
cpu->CPSR |= 0x20; // keep it from crashing the emulator at least
}
}
cpu->AddCycles_C();
}

View File

@ -104,6 +104,9 @@ void T_BL_LONG_1(ARM* cpu)
void T_BL_LONG_2(ARM* cpu)
{
if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction.
return T_UNK(cpu); // TODO: Check ARM7 for exceptions
s32 offset = (cpu->CurInstr & 0x7FF) << 1;
u32 pc = cpu->R[14] + offset;
cpu->R[14] = (cpu->R[15] - 2) | 1;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
#ifndef ARMINTERPRETER_MULTIPLYSUPERLLE_H
#define ARMINTERPRETER_MULTIPLYSUPERLLE_H
#include "types.h"
using namespace melonDS;
/*
Copyright (c) 2024 zaydlang
This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software.
If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
// code taken from: (also features a few alternative implementations that could maybe be worth looking at?)
// https://github.com/calc84maniac/multiplication-algorithm/blob/master/impl_opt.h
// based on research that can be found here: https://bmchtech.github.io/post/multiply/
// the code in this file is dedicated to handling the calculation of the carry flag for multiplication (S variant) instructions on the ARM7TDMI.
// Takes a multiplier between -0x01000000 and 0x00FFFFFF, cycles between 0 and 2
static inline bool booths_multiplication32_opt(u32 multiplicand, u32 multiplier, u32 accumulator) {
// Set the low bit of the multiplicand to cause negation to invert the upper bits, this bit can't propagate to bit 31
multiplicand |= 1;
// Optimized first iteration
u32 booth = (s32)(multiplier << 31) >> 31;
u32 carry = booth * multiplicand;
// Pre-populate accumulator for output
u32 output = accumulator;
u32 sum = output + carry;
int shift = 29;
do {
for (int i = 0; i < 4; i++, shift -= 2) {
// Get next booth factor (-2 to 2, shifted left by 30-shift)
u32 next_booth = (s32)(multiplier << shift) >> shift;
u32 factor = next_booth - booth;
booth = next_booth;
// Get scaled value of booth addend
u32 addend = multiplicand * factor;
// Combine the addend with the CSA
// Not performing any masking seems to work because the lower carries can't propagate to bit 31
output ^= carry ^ addend;
sum += addend;
carry = sum - output;
}
} while (booth != multiplier);
return carry >> 31;
}
// Takes a multiplicand shifted right by 6 and a multiplier shifted right by 26 (zero or sign extended)
static inline bool booths_multiplication64_opt(u32 multiplicand, u32 multiplier, u32 accum_hi) {
// Skipping the first 14 iterations seems to work because the lower carries can't propagate to bit 63
// This means only magic bits 62-61 are needed (which requires decoding 3 booth chunks),
// and only the last two booth iterations are needed
// Set the low bit of the multiplicand to cause negation to invert the upper bits
multiplicand |= 1;
// Pre-populate magic bit 61 for carry
u32 carry = ~accum_hi & UINT32_C(0x20000000);
// Pre-populate magic bits 63-60 for output (with carry magic pre-added in)
u32 output = accum_hi - UINT32_C(0x08000000);
// Get factors from the top 3 booth chunks
u32 booth0 = (s32)(multiplier << 27) >> 27;
u32 booth1 = (s32)(multiplier << 29) >> 29;
u32 booth2 = (s32)(multiplier << 31) >> 31;
u32 factor0 = multiplier - booth0;
u32 factor1 = booth0 - booth1;
u32 factor2 = booth1 - booth2;
// Get scaled value of the 3rd top booth addend
u32 addend = multiplicand * factor2;
// Finalize bits 61-60 of output magic using its sign
output -= addend & UINT32_C(0x10000000);
// Get scaled value of the 2nd top booth addend
addend = multiplicand * factor1;
// Finalize bits 63-62 of output magic using its sign
output -= addend & UINT32_C(0x40000000);
// Get the carry from the CSA in bit 61 and propagate it to bit 62, which is not processed in this iteration
u32 sum = output + (addend & UINT32_C(0x20000000));
// Subtract out the carry magic to get the actual output magic
output -= carry;
// Get scaled value of the 1st top booth addend
addend = multiplicand * factor0;
// Add to bit 62 and propagate the carry
sum += addend & UINT32_C(0x40000000);
// Cancel out the output magic bit 63 to get the carry bit 63
return (sum ^ output) >> 31;
}
// also for MLAS and MUL (thumb ver.)
inline bool MULSCarry(s32 rm, s32 rs, u32 rn, bool lastcycle)
{
if (lastcycle)
return (rs >> 30) == -2;
else
return booths_multiplication32_opt(rm, rs, rn);
}
// also for UMLALS
inline bool UMULLSCarry(u64 rd, u32 rm, u32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
// also for SMLALS
inline bool SMULLSCarry(u64 rd, s32 rm, s32 rs, bool lastcycle)
{
if (lastcycle)
return booths_multiplication64_opt(rm >> 6, rs >> 26, rd >> 32);
else
return booths_multiplication32_opt(rm, rs, rd & 0xFFFFFFFF);
}
#endif

View File

@ -588,7 +588,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
u32 numWriteAddrs = 0, writeAddrsTranslated = 0;
cpu->FillPipeline();
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
u32 nextInstr[2] = {(u32)cpu->NextInstr[0], (u32)cpu->NextInstr[1]};
u32 nextInstrAddr[2] = {blockAddr, r15};
JIT_DEBUGPRINT("start block %x %08x (%x)\n", blockAddr, cpu->CPSR, localAddr);

View File

@ -194,6 +194,7 @@ const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_BKPT = A_BranchAlways | A_Link | ak(ak_UNK);
const u32 A_MSR_IMM = ak(ak_MSR_IMM);
const u32 A_MSR_REG = A_Read0 | ak(ak_MSR_REG);
const u32 A_MRS = A_Write12 | ak(ak_MRS);

View File

@ -130,7 +130,7 @@ INSTRFUNC_PROTO(ARMInstrTable[4096]) =
// 0001 0010 0000
A_MSR_REG, A_BX, A_UNK, A_BLX_REG,
A_UNK, A_QSUB, A_UNK, A_UNK,
A_UNK, A_QSUB, A_UNK, A_BKPT,
A_SMLAWy, A_UNK, A_SMULWy, A_STRH_REG,
A_SMLAWy, A_LDRD_REG, A_SMULWy, A_STRD_REG,

View File

@ -266,8 +266,6 @@ void ARMv5::UpdatePURegions(bool update_all)
// PU disabled
u8 mask = 0x07;
if (CP15Control & (1<<2)) mask |= 0x30;
if (CP15Control & (1<<12)) mask |= 0x40;
memset(PU_UserMap, mask, 0x100000);
memset(PU_PrivMap, mask, 0x100000);
@ -579,7 +577,7 @@ void ARMv5::CP15Write(u32 id, u32 val)
case 0x670:
case 0x671:
char log_output[1024];
PU_Region[(id >> 4) & 0xF] = val;
PU_Region[(id >> 4) & 0xF] = val & ~(0x3F<<6);
std::snprintf(log_output,
sizeof(log_output),
@ -773,16 +771,15 @@ u32 ARMv5::CP15Read(u32 id) const
// TCM are handled here.
// TODO: later on, handle PU, and maybe caches
u32 ARMv5::CodeRead32(u32 addr, bool branch)
u64 ARMv5::CodeRead32(u32 addr, bool branch)
{
/*if (branch || (!(addr & 0xFFF)))
// prefetch abort
// the actual exception is not raised until the aborted instruction is executed
if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]]
{
if (!(PU_Map[addr>>12] & 0x04))
{
PrefetchAbort();
return 0;
}
}*/
CodeCycles = 1;
return ((u64)1<<63);
}
if (addr < ITCMSize)
{
@ -807,150 +804,163 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
}
void ARMv5::DataRead8(u32 addr, u32* val)
bool ARMv5::DataRead8(u32 addr, u32* val)
{
if (!(PU_Map[addr>>12] & 0x01))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return;
return true;
}
*val = BusRead8(addr);
DataCycles = MemTimings[addr >> 12][1];
return true;
}
void ARMv5::DataRead16(u32 addr, u32* val)
bool ARMv5::DataRead16(u32 addr, u32* val)
{
if (!(PU_Map[addr>>12] & 0x01))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
addr &= ~1;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return;
return true;
}
*val = BusRead16(addr);
DataCycles = MemTimings[addr >> 12][1];
return true;
}
void ARMv5::DataRead32(u32 addr, u32* val)
bool ARMv5::DataRead32(u32 addr, u32* val)
{
if (!(PU_Map[addr>>12] & 0x01))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
addr &= ~3;
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return;
return true;
}
*val = BusRead32(addr);
DataCycles = MemTimings[addr >> 12][2];
return true;
}
void ARMv5::DataRead32S(u32 addr, u32* val)
bool ARMv5::DataRead32S(u32 addr, u32* val)
{
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]]
{
DataCycles += 1;
return false;
}
addr &= ~3;
if (addr < ITCMSize)
{
DataCycles += 1;
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles += 1;
*val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return;
return true;
}
*val = BusRead32(addr);
DataCycles += MemTimings[addr >> 12][3];
return true;
}
void ARMv5::DataWrite8(u32 addr, u8 val)
bool ARMv5::DataWrite8(u32 addr, u8 val)
{
if (!(PU_Map[addr>>12] & 0x02))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
if (addr < ITCMSize)
{
DataCycles = 1;
*(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
return;
return true;
}
BusWrite8(addr, val);
DataCycles = MemTimings[addr >> 12][1];
return true;
}
void ARMv5::DataWrite16(u32 addr, u16 val)
bool ARMv5::DataWrite16(u32 addr, u16 val)
{
if (!(PU_Map[addr>>12] & 0x02))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
addr &= ~1;
if (addr < ITCMSize)
@ -958,29 +968,30 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
DataCycles = 1;
*(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
return;
return true;
}
BusWrite16(addr, val);
DataCycles = MemTimings[addr >> 12][1];
return true;
}
void ARMv5::DataWrite32(u32 addr, u32 val)
bool ARMv5::DataWrite32(u32 addr, u32 val)
{
if (!(PU_Map[addr>>12] & 0x02))
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]]
{
DataAbort();
return;
DataCycles = 1;
return false;
}
DataRegion = addr;
addr &= ~3;
if (addr < ITCMSize)
@ -988,21 +999,30 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
DataCycles = 1;
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles = 1;
*(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
return;
return true;
}
BusWrite32(addr, val);
DataCycles = MemTimings[addr >> 12][2];
return true;
}
void ARMv5::DataWrite32S(u32 addr, u32 val)
bool ARMv5::DataWrite32S(u32 addr, u32 val)
{
// Data Aborts
// Exception is handled in the actual instruction implementation
if (!(PU_Map[addr>>12] & 0x02)) [[unlikely]]
{
DataCycles += 1;
return false;
}
addr &= ~3;
if (addr < ITCMSize)
@ -1012,17 +1032,18 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
#ifdef JIT_ENABLED
NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataCycles += 1;
*(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val;
return;
return true;
}
BusWrite32(addr, val);
DataCycles += MemTimings[addr >> 12][3];
return true;
}
void ARMv5::GetCodeMemRegion(u32 addr, MemRegion* region)

View File

@ -162,6 +162,7 @@ void DSi::Reset()
SCFG_Clock9 = 0x0187; // CHECKME
SCFG_Clock7 = 0x0187;
SCFG_EXT[0] = 0x8307F100;
SetVRAMTimings(true);
SCFG_EXT[1] = 0x93FFFB06;
SCFG_MC = 0x0010 | (~((u32)(NDSCartSlot.GetCart() != nullptr))&1);//0x0011;
SCFG_RST = 0;
@ -235,6 +236,7 @@ void DSi::DoSavestateExtra(Savestate* file)
Set_SCFG_Clock9(SCFG_Clock9);
Set_SCFG_MC(SCFG_MC);
DSP.SetRstLine(SCFG_RST & 0x0001);
SetVRAMTimings(SCFG_EXT[0] & (1<<13));
MBK[0][8] = 0;
MBK[1][8] = 0;
@ -713,6 +715,7 @@ void DSi::SoftReset()
SCFG_Clock9 = 0x0187; // CHECKME
SCFG_Clock7 = 0x0187;
SCFG_EXT[0] = 0x8307F100;
SetVRAMTimings(true);
SCFG_EXT[1] = 0x93FFFB06;
SCFG_MC = 0x0010;//0x0011;
// TODO: is this actually reset?
@ -1303,6 +1306,14 @@ void DSi::Set_SCFG_MC(u32 val)
}
}
void DSi::SetVRAMTimings(bool extrabuswidth)
{
if (extrabuswidth)
SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 32, 1, 1); // dsi vram
else
SetARM9RegionTimings(0x06000, 0x07000, Mem9_VRAM, 16, 1, 1); // ds vram
}
u8 DSi::ARM9Read8(u32 addr)
{
@ -2541,11 +2552,18 @@ void DSi::ARM9IOWrite32(u32 addr, u32 val)
u32 oldram = (SCFG_EXT[0] >> 14) & 0x3;
u32 newram = (val >> 14) & 0x3;
u32 oldvram = (SCFG_EXT[0] & (1<<13));
u32 newvram = (val & (1<<13));
SCFG_EXT[0] &= ~0x8007F19F;
SCFG_EXT[0] |= (val & 0x8007F19F);
SCFG_EXT[1] &= ~0x0000F080;
SCFG_EXT[1] |= (val & 0x0000F080);
Log(LogLevel::Debug, "SCFG_EXT = %08X / %08X (val9 %08X)\n", SCFG_EXT[0], SCFG_EXT[1], val);
if (oldvram != newvram)
SetVRAMTimings(newvram);
/*switch ((SCFG_EXT[0] >> 14) & 0x3)
{
case 0:

View File

@ -96,6 +96,7 @@ public:
void MapNWRAM_B(u32 num, u8 val);
void MapNWRAM_C(u32 num, u8 val);
void MapNWRAMRange(u32 cpu, u32 num, u32 val);
void SetVRAMTimings(bool extrabuswidth);
u8 ARM9Read8(u32 addr) override;
u16 ARM9Read16(u32 addr) override;