improve ldm timings

I believe this also applies to other loads as well, but currently untested.
This commit is contained in:
Jaklyy 2024-06-24 19:44:38 -04:00
parent 3583d8222f
commit 109bbed3d0
3 changed files with 67 additions and 20 deletions

View File

@ -302,6 +302,10 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
u32 oldregion = R[15] >> 24;
u32 newregion = addr >> 24;
if (addr < ITCMSize) CodeRegion = Mem9_ITCM;
else if ((addr & DTCMMask) == DTCMBase) CodeRegion = Mem9_DTCM;
else CodeRegion = NDS.ARM9Regions[addr >> 14];
RegionCodeCycles = MemTimings[addr >> 12][0];
@ -1255,6 +1259,57 @@ bool ARMv4::DataWrite32S(u32 addr, u32 val, bool dataabort)
}
void ARMv5::AddCycles_CDI()
{
// LDR/LDM cycles. ARM9 seems to skip the internal cycle there.
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles;
// if a 32 bit bus, start 2 cycles early; else, start 4 cycles early
s32 early;
switch (DataRegion)
{
case 0: // background region; CHECKME
case Mem9_DTCM:
case Mem9_BIOS:
case Mem9_WRAM:
case Mem9_IO:
case Mem9_Pal: // CHECKME
default:
early = 2;
break;
case Mem9_OAM: // CHECKME
case Mem9_GBAROM:
case Mem9_GBARAM:
early = 4;
break;
case Mem9_MainRAM:
early = (CodeRegion == Mem9_MainRAM) ? 0 : 4;
break;
case Mem9_VRAM: // the dsi can toggle the bus width of vram between 32 and 16 bit
early = (NDS.ConsoleType == 0 || !(((DSi&)NDS).SCFG_EXT[0] & (1<<13))) ? 4 : 2;
break;
case Mem9_ITCM: // itcm data fetches cannot be done at the same time as a code fetch, it'll even incurr a 1 cycle penalty when executing from itcm
early = (CodeRegion == Mem9_ITCM) ? -1 : 0;
break;
}
if (numD > early)
{
numC -= early;
if (numC < 0) numC = 0;
Cycles += numC + numD;
}
else
{
Cycles += numC;
}
}
void ARMv4::AddCycles_C()
{
// code only. this code fetch is sequential.

View File

@ -325,18 +325,7 @@ public:
Cycles += numC + numI;
}
void AddCycles_CDI() override
{
// LDR/LDM cycles. ARM9 seems to skip the internal cycle there.
// TODO: ITCM data fetches shouldn't be parallelized, they say
s32 numC = (R[15] & 0x2) ? 0 : CodeCycles;
s32 numD = DataCycles;
//if (DataRegion != CodeRegion)
Cycles += std::max(numC + numD - 6, std::max(numC, numD));
//else
// Cycles += numC + numD;
}
void AddCycles_CDI() override;
void AddCycles_CD() override
{

View File

@ -815,22 +815,23 @@ bool ARMv5::DataRead8(u32 addr, u32* val)
return false;
}
DataRegion = addr;
if (addr < ITCMSize)
{
DataRegion = Mem9_ITCM;
DataCycles = 1;
*val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataRegion = Mem9_DTCM;
DataCycles = 1;
*val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return true;
}
*val = BusRead8(addr);
DataRegion = NDS.ARM9Regions[addr >> 14];
DataCycles = MemTimings[addr >> 12][1];
return true;
}
@ -843,24 +844,25 @@ bool ARMv5::DataRead16(u32 addr, u32* val)
return false;
}
DataRegion = addr;
addr &= ~1;
if (addr < ITCMSize)
{
DataRegion = Mem9_ITCM;
DataCycles = 1;
*val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataRegion = Mem9_DTCM;
DataCycles = 1;
*val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return true;
}
*val = BusRead16(addr);
DataRegion = NDS.ARM9Regions[addr >> 14];
DataCycles = MemTimings[addr >> 12][1];
return true;
}
@ -873,24 +875,25 @@ bool ARMv5::DataRead32(u32 addr, u32* val)
return false;
}
DataRegion = addr;
addr &= ~3;
if (addr < ITCMSize)
{
DataRegion = Mem9_ITCM;
DataCycles = 1;
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return true;
}
if ((addr & DTCMMask) == DTCMBase)
{
DataRegion = Mem9_DTCM;
DataCycles = 1;
*val = *(u32*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return true;
}
*val = BusRead32(addr);
DataRegion = NDS.ARM9Regions[addr >> 14];
DataCycles = MemTimings[addr >> 12][2];
return true;
}