mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2025-07-23 06:10:03 -06:00
new block cache and much more...
- more reliable code invalidation detection - blocks aren't stopped at any branch, but are being followed if possible to get larger blocks - idle loop recognition - optimised literal loads, load/store cycle counting and loads/stores from constant addresses
This commit is contained in:
44
src/ARM.cpp
44
src/ARM.cpp
@ -623,21 +623,26 @@ void ARMv5::ExecuteJIT()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock<0>(instrAddr);
|
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<0>(instrAddr);
|
||||||
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
|
if (block)
|
||||||
|
Cycles += block();
|
||||||
|
else
|
||||||
|
ARMJIT::CompileBlock(this);
|
||||||
|
|
||||||
|
NDS::ARM9Timestamp += Cycles;
|
||||||
|
Cycles = 0;
|
||||||
|
|
||||||
|
if (IRQ) TriggerIRQ();
|
||||||
if (Halted)
|
if (Halted)
|
||||||
{
|
{
|
||||||
if (Halted == 1 && NDS::ARM9Timestamp < NDS::ARM9Target)
|
bool idleLoop = Halted & 0x20;
|
||||||
|
Halted &= ~0x20;
|
||||||
|
if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
|
||||||
{
|
{
|
||||||
NDS::ARM9Timestamp = NDS::ARM9Target;
|
NDS::ARM9Timestamp = NDS::ARM9Target;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (IRQ) TriggerIRQ();
|
|
||||||
|
|
||||||
NDS::ARM9Timestamp += Cycles;
|
|
||||||
Cycles = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Halted == 2)
|
if (Halted == 2)
|
||||||
@ -753,23 +758,28 @@ void ARMv4::ExecuteJIT()
|
|||||||
printf("ARMv4 PC in non executable region %08X\n", R[15]);
|
printf("ARMv4 PC in non executable region %08X\n", R[15]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock<1>(instrAddr);
|
|
||||||
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
|
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock<1>(instrAddr);
|
||||||
|
if (block)
|
||||||
|
Cycles += block();
|
||||||
|
else
|
||||||
|
ARMJIT::CompileBlock(this);
|
||||||
|
|
||||||
|
NDS::ARM7Timestamp += Cycles;
|
||||||
|
Cycles = 0;
|
||||||
|
|
||||||
// TODO optimize this shit!!!
|
// TODO optimize this shit!!!
|
||||||
|
if (IRQ) TriggerIRQ();
|
||||||
if (Halted)
|
if (Halted)
|
||||||
{
|
{
|
||||||
if (Halted == 1 && NDS::ARM7Timestamp < NDS::ARM7Target)
|
bool idleLoop = Halted & 0x20;
|
||||||
|
Halted &= ~0x20;
|
||||||
|
if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
|
||||||
{
|
{
|
||||||
NDS::ARM7Timestamp = NDS::ARM7Target;
|
NDS::ARM7Timestamp = NDS::ARM7Target;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IRQ) TriggerIRQ();
|
|
||||||
|
|
||||||
NDS::ARM7Timestamp += Cycles;
|
|
||||||
Cycles = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Halted == 2)
|
if (Halted == 2)
|
||||||
@ -779,6 +789,8 @@ void ARMv4::ExecuteJIT()
|
|||||||
|
|
||||||
void ARMv5::FillPipeline()
|
void ARMv5::FillPipeline()
|
||||||
{
|
{
|
||||||
|
SetupCodeMem(R[15]);
|
||||||
|
|
||||||
if (CPSR & 0x20)
|
if (CPSR & 0x20)
|
||||||
{
|
{
|
||||||
if ((R[15] - 2) & 0x2)
|
if ((R[15] - 2) & 0x2)
|
||||||
@ -801,6 +813,8 @@ void ARMv5::FillPipeline()
|
|||||||
|
|
||||||
void ARMv4::FillPipeline()
|
void ARMv4::FillPipeline()
|
||||||
{
|
{
|
||||||
|
SetupCodeMem(R[15]);
|
||||||
|
|
||||||
if (CPSR & 0x20)
|
if (CPSR & 0x20)
|
||||||
{
|
{
|
||||||
NextInstr[0] = CodeRead16(R[15] - 2);
|
NextInstr[0] = CodeRead16(R[15] - 2);
|
||||||
|
16
src/ARM.h
16
src/ARM.h
@ -311,7 +311,7 @@ public:
|
|||||||
{
|
{
|
||||||
*val = BusRead8(addr);
|
*val = BusRead8(addr);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataRead16(u32 addr, u32* val)
|
void DataRead16(u32 addr, u32* val)
|
||||||
@ -320,7 +320,7 @@ public:
|
|||||||
|
|
||||||
*val = BusRead16(addr);
|
*val = BusRead16(addr);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataRead32(u32 addr, u32* val)
|
void DataRead32(u32 addr, u32* val)
|
||||||
@ -329,7 +329,7 @@ public:
|
|||||||
|
|
||||||
*val = BusRead32(addr);
|
*val = BusRead32(addr);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataRead32S(u32 addr, u32* val)
|
void DataRead32S(u32 addr, u32* val)
|
||||||
@ -337,14 +337,14 @@ public:
|
|||||||
addr &= ~3;
|
addr &= ~3;
|
||||||
|
|
||||||
*val = BusRead32(addr);
|
*val = BusRead32(addr);
|
||||||
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
|
DataCycles += NDS::ARM7MemTimings[addr >> 15][3];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataWrite8(u32 addr, u8 val)
|
void DataWrite8(u32 addr, u8 val)
|
||||||
{
|
{
|
||||||
BusWrite8(addr, val);
|
BusWrite8(addr, val);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataWrite16(u32 addr, u16 val)
|
void DataWrite16(u32 addr, u16 val)
|
||||||
@ -353,7 +353,7 @@ public:
|
|||||||
|
|
||||||
BusWrite16(addr, val);
|
BusWrite16(addr, val);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][0];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataWrite32(u32 addr, u32 val)
|
void DataWrite32(u32 addr, u32 val)
|
||||||
@ -362,7 +362,7 @@ public:
|
|||||||
|
|
||||||
BusWrite32(addr, val);
|
BusWrite32(addr, val);
|
||||||
DataRegion = addr >> 24;
|
DataRegion = addr >> 24;
|
||||||
DataCycles = NDS::ARM7MemTimings[DataRegion][2];
|
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DataWrite32S(u32 addr, u32 val)
|
void DataWrite32S(u32 addr, u32 val)
|
||||||
@ -370,7 +370,7 @@ public:
|
|||||||
addr &= ~3;
|
addr &= ~3;
|
||||||
|
|
||||||
BusWrite32(addr, val);
|
BusWrite32(addr, val);
|
||||||
DataCycles += NDS::ARM7MemTimings[DataRegion][3];
|
DataCycles += NDS::ARM7MemTimings[addr >> 15][3];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,6 +28,15 @@ namespace ARMInterpreter
|
|||||||
extern void (*ARMInstrTable[4096])(ARM* cpu);
|
extern void (*ARMInstrTable[4096])(ARM* cpu);
|
||||||
extern void (*THUMBInstrTable[1024])(ARM* cpu);
|
extern void (*THUMBInstrTable[1024])(ARM* cpu);
|
||||||
|
|
||||||
|
void A_MSR_IMM(ARM* cpu);
|
||||||
|
void A_MSR_REG(ARM* cpu);
|
||||||
|
void A_MRS(ARM* cpu);
|
||||||
|
void A_MCR(ARM* cpu);
|
||||||
|
void A_MRC(ARM* cpu);
|
||||||
|
void A_SVC(ARM* cpu);
|
||||||
|
|
||||||
|
void T_SVC(ARM* cpu);
|
||||||
|
|
||||||
void A_BLX_IMM(ARM* cpu); // I'm a special one look at me
|
void A_BLX_IMM(ARM* cpu); // I'm a special one look at me
|
||||||
|
|
||||||
}
|
}
|
||||||
|
757
src/ARMJIT.cpp
757
src/ARMJIT.cpp
@ -1,122 +1,137 @@
|
|||||||
#include "ARMJIT.h"
|
#include "ARMJIT.h"
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#include "Config.h"
|
#include "Config.h"
|
||||||
|
|
||||||
|
#include "ARMJIT_Internal.h"
|
||||||
#include "ARMJIT_x64/ARMJIT_Compiler.h"
|
#include "ARMJIT_x64/ARMJIT_Compiler.h"
|
||||||
|
|
||||||
|
#include "ARMInterpreter_ALU.h"
|
||||||
|
#include "ARMInterpreter_LoadStore.h"
|
||||||
|
#include "ARMInterpreter_Branch.h"
|
||||||
|
#include "ARMInterpreter.h"
|
||||||
|
|
||||||
|
#include "GPU3D.h"
|
||||||
|
#include "SPU.h"
|
||||||
|
#include "Wifi.h"
|
||||||
|
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#define JIT_DEBUGPRINT(msg, ...)
|
||||||
|
|
||||||
Compiler* compiler;
|
Compiler* compiler;
|
||||||
BlockCache cache;
|
|
||||||
|
const u32 ExeMemRegionSizes[] = {
|
||||||
|
0x8000, // Unmapped Region (dummy)
|
||||||
|
0x8000, // ITCM
|
||||||
|
4*1024*1024, // Main RAM
|
||||||
|
0x8000, // SWRAM
|
||||||
|
0xA4000, // LCDC
|
||||||
|
0x8000, // ARM9 BIOS
|
||||||
|
0x4000, // ARM7 BIOS
|
||||||
|
0x10000, // ARM7 WRAM
|
||||||
|
0x40000 // ARM7 WVRAM
|
||||||
|
};
|
||||||
|
|
||||||
|
const u32 ExeMemRegionOffsets[] = {
|
||||||
|
0,
|
||||||
|
0x8000,
|
||||||
|
0x10000,
|
||||||
|
0x410000,
|
||||||
|
0x418000,
|
||||||
|
0x4BC000,
|
||||||
|
0x4C4000,
|
||||||
|
0x4C8000,
|
||||||
|
0x4D8000,
|
||||||
|
0x518000,
|
||||||
|
};
|
||||||
|
|
||||||
#define DUP2(x) x, x
|
#define DUP2(x) x, x
|
||||||
|
|
||||||
static ptrdiff_t JIT_MEM[2][32] = {
|
const static ExeMemKind JIT_MEM[2][32] = {
|
||||||
//arm9
|
//arm9
|
||||||
{
|
{
|
||||||
/* 0X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)),
|
/* 0X*/ DUP2(exeMem_ITCM),
|
||||||
/* 1X*/ DUP2(offsetof(BlockCache, ARM9_ITCM)), // mirror
|
/* 1X*/ DUP2(exeMem_ITCM), // mirror
|
||||||
/* 2X*/ DUP2(offsetof(BlockCache, MainRAM)),
|
/* 2X*/ DUP2(exeMem_MainRAM),
|
||||||
/* 3X*/ DUP2(offsetof(BlockCache, SWRAM)),
|
/* 3X*/ DUP2(exeMem_SWRAM),
|
||||||
/* 4X*/ DUP2(-1),
|
/* 4X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 5X*/ DUP2(-1),
|
/* 5X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 6X*/ -1,
|
/* 6X*/ exeMem_Unmapped,
|
||||||
offsetof(BlockCache, ARM9_LCDC), // Plain ARM9-CPU Access (LCDC mode) (max 656KB)
|
exeMem_LCDC, // Plain ARM9-CPU Access (LCDC mode) (max 656KB)
|
||||||
/* 7X*/ DUP2(-1),
|
/* 7X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 8X*/ DUP2(-1),
|
/* 8X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 9X*/ DUP2(-1),
|
/* 9X*/ DUP2(exeMem_Unmapped),
|
||||||
/* AX*/ DUP2(-1),
|
/* AX*/ DUP2(exeMem_Unmapped),
|
||||||
/* BX*/ DUP2(-1),
|
/* BX*/ DUP2(exeMem_Unmapped),
|
||||||
/* CX*/ DUP2(-1),
|
/* CX*/ DUP2(exeMem_Unmapped),
|
||||||
/* DX*/ DUP2(-1),
|
/* DX*/ DUP2(exeMem_Unmapped),
|
||||||
/* EX*/ DUP2(-1),
|
/* EX*/ DUP2(exeMem_Unmapped),
|
||||||
/* FX*/ DUP2(offsetof(BlockCache, ARM9_BIOS))
|
/* FX*/ DUP2(exeMem_ARM9_BIOS)
|
||||||
},
|
},
|
||||||
//arm7
|
//arm7
|
||||||
{
|
{
|
||||||
/* 0X*/ DUP2(offsetof(BlockCache, ARM7_BIOS)),
|
/* 0X*/ DUP2(exeMem_ARM7_BIOS),
|
||||||
/* 1X*/ DUP2(-1),
|
/* 1X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 2X*/ DUP2(offsetof(BlockCache, MainRAM)),
|
/* 2X*/ DUP2(exeMem_MainRAM),
|
||||||
/* 3X*/ offsetof(BlockCache, SWRAM),
|
/* 3X*/ exeMem_SWRAM,
|
||||||
offsetof(BlockCache, ARM7_WRAM),
|
exeMem_ARM7_WRAM,
|
||||||
/* 4X*/ DUP2(-1),
|
/* 4X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 5X*/ DUP2(-1),
|
/* 5X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 6X*/ DUP2(offsetof(BlockCache, ARM7_WVRAM)), /* contrary to Gbatek, melonDS and itself,
|
/* 6X*/ DUP2(exeMem_ARM7_WVRAM), /* contrary to Gbatek, melonDS and itself,
|
||||||
DeSmuME doesn't mirror the 64 MB region at 0x6800000 */
|
DeSmuME doesn't mirror the 64 MB region at 0x6800000 */
|
||||||
/* 7X*/ DUP2(-1),
|
/* 7X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 8X*/ DUP2(-1),
|
/* 8X*/ DUP2(exeMem_Unmapped),
|
||||||
/* 9X*/ DUP2(-1),
|
/* 9X*/ DUP2(exeMem_Unmapped),
|
||||||
/* AX*/ DUP2(-1),
|
/* AX*/ DUP2(exeMem_Unmapped),
|
||||||
/* BX*/ DUP2(-1),
|
/* BX*/ DUP2(exeMem_Unmapped),
|
||||||
/* CX*/ DUP2(-1),
|
/* CX*/ DUP2(exeMem_Unmapped),
|
||||||
/* DX*/ DUP2(-1),
|
/* DX*/ DUP2(exeMem_Unmapped),
|
||||||
/* EX*/ DUP2(-1),
|
/* EX*/ DUP2(exeMem_Unmapped),
|
||||||
/* FX*/ DUP2(-1)
|
/* FX*/ DUP2(exeMem_Unmapped)
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static u32 JIT_MASK[2][32] = {
|
|
||||||
//arm9
|
|
||||||
{
|
|
||||||
/* 0X*/ DUP2(0x00007FFF),
|
|
||||||
/* 1X*/ DUP2(0x00007FFF),
|
|
||||||
/* 2X*/ DUP2(0x003FFFFF),
|
|
||||||
/* 3X*/ DUP2(0x00007FFF),
|
|
||||||
/* 4X*/ DUP2(0x00000000),
|
|
||||||
/* 5X*/ DUP2(0x00000000),
|
|
||||||
/* 6X*/ 0x00000000,
|
|
||||||
0x000FFFFF,
|
|
||||||
/* 7X*/ DUP2(0x00000000),
|
|
||||||
/* 8X*/ DUP2(0x00000000),
|
|
||||||
/* 9X*/ DUP2(0x00000000),
|
|
||||||
/* AX*/ DUP2(0x00000000),
|
|
||||||
/* BX*/ DUP2(0x00000000),
|
|
||||||
/* CX*/ DUP2(0x00000000),
|
|
||||||
/* DX*/ DUP2(0x00000000),
|
|
||||||
/* EX*/ DUP2(0x00000000),
|
|
||||||
/* FX*/ DUP2(0x00007FFF)
|
|
||||||
},
|
|
||||||
//arm7
|
|
||||||
{
|
|
||||||
/* 0X*/ DUP2(0x00003FFF),
|
|
||||||
/* 1X*/ DUP2(0x00000000),
|
|
||||||
/* 2X*/ DUP2(0x003FFFFF),
|
|
||||||
/* 3X*/ 0x00007FFF,
|
|
||||||
0x0000FFFF,
|
|
||||||
/* 4X*/ 0x00000000,
|
|
||||||
0x0000FFFF,
|
|
||||||
/* 5X*/ DUP2(0x00000000),
|
|
||||||
/* 6X*/ DUP2(0x0003FFFF),
|
|
||||||
/* 7X*/ DUP2(0x00000000),
|
|
||||||
/* 8X*/ DUP2(0x00000000),
|
|
||||||
/* 9X*/ DUP2(0x00000000),
|
|
||||||
/* AX*/ DUP2(0x00000000),
|
|
||||||
/* BX*/ DUP2(0x00000000),
|
|
||||||
/* CX*/ DUP2(0x00000000),
|
|
||||||
/* DX*/ DUP2(0x00000000),
|
|
||||||
/* EX*/ DUP2(0x00000000),
|
|
||||||
/* FX*/ DUP2(0x00000000)
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#undef DUP2
|
#undef DUP2
|
||||||
|
|
||||||
|
/*
|
||||||
|
translates address to pseudo physical address
|
||||||
|
- more compact, eliminates mirroring, everything comes in a row
|
||||||
|
- we only need one translation table
|
||||||
|
*/
|
||||||
|
u32 AddrTranslate9[0x2000];
|
||||||
|
u32 AddrTranslate7[0x4000];
|
||||||
|
|
||||||
|
JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
|
||||||
|
AddressRange CodeRanges[ExeMemSpaceSize / 256];
|
||||||
|
|
||||||
|
TinyVector<JitBlock*> JitBlocks;
|
||||||
|
JitBlock* RestoreCandidates[0x1000] = {NULL};
|
||||||
|
|
||||||
|
u32 HashRestoreCandidate(u32 pseudoPhysicalAddr)
|
||||||
|
{
|
||||||
|
return (u32)(((u64)pseudoPhysicalAddr * 11400714819323198485llu) >> 53);
|
||||||
|
}
|
||||||
|
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
memset(&cache, 0, sizeof(BlockCache));
|
|
||||||
|
|
||||||
for (int i = 0; i < 0x2000; i++)
|
for (int i = 0; i < 0x2000; i++)
|
||||||
cache.AddrMapping9[i] = JIT_MEM[0][i >> 8] == -1 ? NULL :
|
{
|
||||||
(CompiledBlock*)((u8*)&cache + JIT_MEM[0][i >> 8])
|
ExeMemKind kind = JIT_MEM[0][i >> 8];
|
||||||
+ (((i << 15) & JIT_MASK[0][i >> 8]) >> 1);
|
u32 size = ExeMemRegionSizes[kind];
|
||||||
|
|
||||||
|
AddrTranslate9[i] = ExeMemRegionOffsets[kind] + ((i << 15) & (size - 1));
|
||||||
|
}
|
||||||
for (int i = 0; i < 0x4000; i++)
|
for (int i = 0; i < 0x4000; i++)
|
||||||
cache.AddrMapping7[i] = JIT_MEM[1][i >> 9] == -1 ? NULL :
|
{
|
||||||
(CompiledBlock*)((u8*)&cache + JIT_MEM[1][i >> 9])
|
ExeMemKind kind = JIT_MEM[1][i >> 9];
|
||||||
+ (((i << 14) & JIT_MASK[1][i >> 9]) >> 1);
|
u32 size = ExeMemRegionSizes[kind];
|
||||||
|
|
||||||
|
AddrTranslate7[i] = ExeMemRegionOffsets[kind] + ((i << 14) & (size - 1));
|
||||||
|
}
|
||||||
|
|
||||||
compiler = new Compiler();
|
compiler = new Compiler();
|
||||||
}
|
}
|
||||||
@ -126,7 +141,7 @@ void DeInit()
|
|||||||
delete compiler;
|
delete compiler;
|
||||||
}
|
}
|
||||||
|
|
||||||
void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
||||||
{
|
{
|
||||||
for (int j = start; j >= 0; j--)
|
for (int j = start; j >= 0; j--)
|
||||||
{
|
{
|
||||||
@ -144,7 +159,154 @@ void floodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledBlock CompileBlock(ARM* cpu)
|
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetAddr)
|
||||||
|
{
|
||||||
|
if (thumb)
|
||||||
|
{
|
||||||
|
u32 r15 = instr.Addr + 4;
|
||||||
|
cond = 0xE;
|
||||||
|
|
||||||
|
if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12)))
|
||||||
|
{
|
||||||
|
targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9);
|
||||||
|
targetAddr += ((instr.Instr >> 16) & 0x7FF) << 1;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (instr.Info.Kind == ARMInstrInfo::tk_B)
|
||||||
|
{
|
||||||
|
s32 offset = (s32)((instr.Instr & 0x7FF) << 21) >> 20;
|
||||||
|
targetAddr = r15 + offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (instr.Info.Kind == ARMInstrInfo::tk_BCOND)
|
||||||
|
{
|
||||||
|
cond = (instr.Instr >> 8) & 0xF;
|
||||||
|
s32 offset = (s32)(instr.Instr << 24) >> 23;
|
||||||
|
targetAddr = r15 + offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cond = instr.Cond();
|
||||||
|
if (instr.Info.Kind == ARMInstrInfo::ak_BL
|
||||||
|
|| instr.Info.Kind == ARMInstrInfo::ak_B)
|
||||||
|
{
|
||||||
|
s32 offset = (s32)(instr.Instr << 8) >> 6;
|
||||||
|
u32 r15 = instr.Addr + 8;
|
||||||
|
targetAddr = r15 + offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsIdleLoop(FetchedInstr* instrs, int instrsCount)
|
||||||
|
{
|
||||||
|
// see https://github.com/dolphin-emu/dolphin/blob/master/Source/Core/Core/PowerPC/PPCAnalyst.cpp#L678
|
||||||
|
// it basically checks if one iteration of a loop depends on another
|
||||||
|
// the rules are quite simple
|
||||||
|
|
||||||
|
u16 regsWrittenTo = 0;
|
||||||
|
u16 regsDisallowedToWrite = 0;
|
||||||
|
for (int i = 0; i < instrsCount; i++)
|
||||||
|
{
|
||||||
|
//printf("instr %d %x regs(%x %x) %x %x\n", i, instrs[i].Instr, instrs[i].Info.DstRegs, instrs[i].Info.SrcRegs, regsWrittenTo, regsDisallowedToWrite);
|
||||||
|
if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem)
|
||||||
|
return false;
|
||||||
|
if (i < instrsCount - 1 && instrs[i].Info.Branches())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
u16 srcRegs = instrs[i].Info.SrcRegs & ~(1 << 15);
|
||||||
|
u16 dstRegs = instrs[i].Info.DstRegs & ~(1 << 15);
|
||||||
|
|
||||||
|
regsDisallowedToWrite |= srcRegs & ~regsWrittenTo;
|
||||||
|
|
||||||
|
if (dstRegs & regsDisallowedToWrite)
|
||||||
|
return false;
|
||||||
|
regsWrittenTo |= dstRegs;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef void (*InterpreterFunc)(ARM* cpu);
|
||||||
|
|
||||||
|
#define F(x) &ARMInterpreter::A_##x
|
||||||
|
#define F_ALU(name, s) \
|
||||||
|
F(name##_REG_LSL_IMM##s), F(name##_REG_LSR_IMM##s), F(name##_REG_ASR_IMM##s), F(name##_REG_ROR_IMM##s), \
|
||||||
|
F(name##_REG_LSL_REG##s), F(name##_REG_LSR_REG##s), F(name##_REG_ASR_REG##s), F(name##_REG_ROR_REG##s), F(name##_IMM##s)
|
||||||
|
#define F_MEM_WB(name) \
|
||||||
|
F(name##_REG_LSL), F(name##_REG_LSR), F(name##_REG_ASR), F(name##_REG_ROR), F(name##_IMM), \
|
||||||
|
F(name##_POST_REG_LSL), F(name##_POST_REG_LSR), F(name##_POST_REG_ASR), F(name##_POST_REG_ROR), F(name##_POST_IMM)
|
||||||
|
#define F_MEM_HD(name) \
|
||||||
|
F(name##_REG), F(name##_IMM), F(name##_POST_REG), F(name##_POST_IMM)
|
||||||
|
InterpreterFunc InterpretARM[ARMInstrInfo::ak_Count] =
|
||||||
|
{
|
||||||
|
F_ALU(AND,), F_ALU(AND,_S),
|
||||||
|
F_ALU(EOR,), F_ALU(EOR,_S),
|
||||||
|
F_ALU(SUB,), F_ALU(SUB,_S),
|
||||||
|
F_ALU(RSB,), F_ALU(RSB,_S),
|
||||||
|
F_ALU(ADD,), F_ALU(ADD,_S),
|
||||||
|
F_ALU(ADC,), F_ALU(ADC,_S),
|
||||||
|
F_ALU(SBC,), F_ALU(SBC,_S),
|
||||||
|
F_ALU(RSC,), F_ALU(RSC,_S),
|
||||||
|
F_ALU(ORR,), F_ALU(ORR,_S),
|
||||||
|
F_ALU(MOV,), F_ALU(MOV,_S),
|
||||||
|
F_ALU(BIC,), F_ALU(BIC,_S),
|
||||||
|
F_ALU(MVN,), F_ALU(MVN,_S),
|
||||||
|
F_ALU(TST,),
|
||||||
|
F_ALU(TEQ,),
|
||||||
|
F_ALU(CMP,),
|
||||||
|
F_ALU(CMN,),
|
||||||
|
|
||||||
|
F(MUL), F(MLA), F(UMULL), F(UMLAL), F(SMULL), F(SMLAL), F(SMLAxy), F(SMLAWy), F(SMULWy), F(SMLALxy), F(SMULxy),
|
||||||
|
F(CLZ), F(QADD), F(QDADD), F(QSUB), F(QDSUB),
|
||||||
|
|
||||||
|
F_MEM_WB(STR),
|
||||||
|
F_MEM_WB(STRB),
|
||||||
|
F_MEM_WB(LDR),
|
||||||
|
F_MEM_WB(LDRB),
|
||||||
|
|
||||||
|
F_MEM_HD(STRH),
|
||||||
|
F_MEM_HD(LDRD),
|
||||||
|
F_MEM_HD(STRD),
|
||||||
|
F_MEM_HD(LDRH),
|
||||||
|
F_MEM_HD(LDRSB),
|
||||||
|
F_MEM_HD(LDRSH),
|
||||||
|
|
||||||
|
F(SWP), F(SWPB),
|
||||||
|
F(LDM), F(STM),
|
||||||
|
|
||||||
|
F(B), F(BL), F(BLX_IMM), F(BX), F(BLX_REG),
|
||||||
|
F(UNK), F(MSR_IMM), F(MSR_REG), F(MRS), F(MCR), F(MRC), F(SVC)
|
||||||
|
};
|
||||||
|
#undef F_ALU
|
||||||
|
#undef F_MEM_WB
|
||||||
|
#undef F_MEM_HD
|
||||||
|
#undef F
|
||||||
|
|
||||||
|
#define F(x) ARMInterpreter::T_##x
|
||||||
|
InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
|
||||||
|
{
|
||||||
|
F(LSL_IMM), F(LSR_IMM), F(ASR_IMM),
|
||||||
|
F(ADD_REG_), F(SUB_REG_), F(ADD_IMM_), F(SUB_IMM_),
|
||||||
|
F(MOV_IMM), F(CMP_IMM), F(ADD_IMM), F(SUB_IMM),
|
||||||
|
F(AND_REG), F(EOR_REG), F(LSL_REG), F(LSR_REG), F(ASR_REG),
|
||||||
|
F(ADC_REG), F(SBC_REG), F(ROR_REG), F(TST_REG), F(NEG_REG),
|
||||||
|
F(CMP_REG), F(CMN_REG), F(ORR_REG), F(MUL_REG), F(BIC_REG), F(MVN_REG),
|
||||||
|
F(ADD_HIREG), F(CMP_HIREG), F(MOV_HIREG),
|
||||||
|
F(ADD_PCREL), F(ADD_SPREL), F(ADD_SP),
|
||||||
|
F(LDR_PCREL), F(STR_REG), F(STRB_REG), F(LDR_REG), F(LDRB_REG), F(STRH_REG),
|
||||||
|
F(LDRSB_REG), F(LDRH_REG), F(LDRSH_REG), F(STR_IMM), F(LDR_IMM), F(STRB_IMM),
|
||||||
|
F(LDRB_IMM), F(STRH_IMM), F(LDRH_IMM), F(STR_SPREL), F(LDR_SPREL),
|
||||||
|
F(PUSH), F(POP), F(LDMIA), F(STMIA),
|
||||||
|
F(BCOND), F(BX), F(BLX_REG), F(B), F(BL_LONG_1), F(BL_LONG_2),
|
||||||
|
F(UNK), F(SVC),
|
||||||
|
NULL // BL_LONG psudo opcode
|
||||||
|
};
|
||||||
|
#undef F
|
||||||
|
|
||||||
|
void CompileBlock(ARM* cpu)
|
||||||
{
|
{
|
||||||
bool thumb = cpu->CPSR & 0x20;
|
bool thumb = cpu->CPSR & 0x20;
|
||||||
|
|
||||||
@ -153,17 +315,41 @@ CompiledBlock CompileBlock(ARM* cpu)
|
|||||||
if (Config::JIT_MaxBlockSize > 32)
|
if (Config::JIT_MaxBlockSize > 32)
|
||||||
Config::JIT_MaxBlockSize = 32;
|
Config::JIT_MaxBlockSize = 32;
|
||||||
|
|
||||||
|
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
|
||||||
|
if (!(cpu->Num == 0
|
||||||
|
? IsMapped<0>(blockAddr)
|
||||||
|
: IsMapped<1>(blockAddr)))
|
||||||
|
{
|
||||||
|
printf("Trying to compile a block in unmapped memory: %x\n", blockAddr);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 pseudoPhysicalAddr = cpu->Num == 0
|
||||||
|
? TranslateAddr<0>(blockAddr)
|
||||||
|
: TranslateAddr<1>(blockAddr);
|
||||||
|
|
||||||
FetchedInstr instrs[Config::JIT_MaxBlockSize];
|
FetchedInstr instrs[Config::JIT_MaxBlockSize];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
|
|
||||||
u32 r15 = cpu->R[15];
|
u32 r15 = cpu->R[15];
|
||||||
|
|
||||||
|
u32 addresseRanges[32] = {};
|
||||||
|
u32 numAddressRanges = 0;
|
||||||
|
|
||||||
cpu->FillPipeline();
|
cpu->FillPipeline();
|
||||||
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
||||||
u32 nextInstrAddr[2] = {blockAddr, r15};
|
u32 nextInstrAddr[2] = {blockAddr, r15};
|
||||||
|
|
||||||
|
JIT_DEBUGPRINT("start block %x (%x) %p %p (region invalidates %dx)\n",
|
||||||
|
blockAddr, pseudoPhysicalAddr, FastBlockAccess[pseudoPhysicalAddr / 2],
|
||||||
|
cpu->Num == 0 ? LookUpBlock<0>(blockAddr) : LookUpBlock<1>(blockAddr),
|
||||||
|
CodeRanges[pseudoPhysicalAddr / 256].TimesInvalidated);
|
||||||
|
|
||||||
|
u32 lastSegmentStart = blockAddr;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
r15 += thumb ? 2 : 4;
|
r15 += thumb ? 2 : 4;
|
||||||
|
|
||||||
|
instrs[i].BranchFlags = 0;
|
||||||
instrs[i].SetFlags = 0;
|
instrs[i].SetFlags = 0;
|
||||||
instrs[i].Instr = nextInstr[0];
|
instrs[i].Instr = nextInstr[0];
|
||||||
instrs[i].NextInstr[0] = nextInstr[0] = nextInstr[1];
|
instrs[i].NextInstr[0] = nextInstr[0] = nextInstr[1];
|
||||||
@ -171,6 +357,25 @@ CompiledBlock CompileBlock(ARM* cpu)
|
|||||||
instrs[i].Addr = nextInstrAddr[0];
|
instrs[i].Addr = nextInstrAddr[0];
|
||||||
nextInstrAddr[0] = nextInstrAddr[1];
|
nextInstrAddr[0] = nextInstrAddr[1];
|
||||||
nextInstrAddr[1] = r15;
|
nextInstrAddr[1] = r15;
|
||||||
|
JIT_DEBUGPRINT("instr %08x %x\n", instrs[i].Instr & (thumb ? 0xFFFF : ~0), instrs[i].Addr);
|
||||||
|
|
||||||
|
u32 translatedAddr = (cpu->Num == 0
|
||||||
|
? TranslateAddr<0>(instrs[i].Addr)
|
||||||
|
: TranslateAddr<1>(instrs[i].Addr)) & ~0xFF;
|
||||||
|
if (i == 0 || translatedAddr != addresseRanges[numAddressRanges - 1])
|
||||||
|
{
|
||||||
|
bool returning = false;
|
||||||
|
for (int j = 0; j < numAddressRanges; j++)
|
||||||
|
{
|
||||||
|
if (addresseRanges[j] == translatedAddr)
|
||||||
|
{
|
||||||
|
returning = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!returning)
|
||||||
|
addresseRanges[numAddressRanges++] = translatedAddr;
|
||||||
|
}
|
||||||
|
|
||||||
if (cpu->Num == 0)
|
if (cpu->Num == 0)
|
||||||
{
|
{
|
||||||
@ -198,6 +403,34 @@ CompiledBlock CompileBlock(ARM* cpu)
|
|||||||
instrs[i].NextInstr[1] = nextInstr[1];
|
instrs[i].NextInstr[1] = nextInstr[1];
|
||||||
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
|
instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr);
|
||||||
|
|
||||||
|
cpu->R[15] = r15;
|
||||||
|
cpu->CurInstr = instrs[i].Instr;
|
||||||
|
cpu->CodeCycles = instrs[i].CodeCycles;
|
||||||
|
|
||||||
|
if (thumb)
|
||||||
|
{
|
||||||
|
InterpretTHUMB[instrs[i].Info.Kind](cpu);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (cpu->Num == 0 && instrs[i].Info.Kind == ARMInstrInfo::ak_BLX_IMM)
|
||||||
|
{
|
||||||
|
ARMInterpreter::A_BLX_IMM(cpu);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u32 icode = ((instrs[i].Instr >> 4) & 0xF) | ((instrs[i].Instr >> 16) & 0xFF0);
|
||||||
|
assert(InterpretARM[instrs[i].Info.Kind] == ARMInterpreter::ARMInstrTable[icode] || instrs[i].Info.Kind == ARMInstrInfo::ak_MOV_REG_LSL_IMM);
|
||||||
|
if (cpu->CheckCondition(instrs[i].Cond()))
|
||||||
|
InterpretARM[instrs[i].Info.Kind](cpu);
|
||||||
|
else
|
||||||
|
cpu->AddCycles_C();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instrs[i].DataCycles = cpu->DataCycles;
|
||||||
|
instrs[i].DataRegion = cpu->DataRegion;
|
||||||
|
|
||||||
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
|
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
|
||||||
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
|
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
|
||||||
{
|
{
|
||||||
@ -208,40 +441,340 @@ CompiledBlock CompileBlock(ARM* cpu)
|
|||||||
instrs[i - 1].Info.EndBlock = true;
|
instrs[i - 1].Info.EndBlock = true;
|
||||||
i--;
|
i--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (instrs[i].Info.Branches() && Config::JIT_BrancheOptimisations)
|
||||||
|
{
|
||||||
|
bool hasBranched = cpu->R[15] != r15;
|
||||||
|
|
||||||
|
u32 cond, target;
|
||||||
|
bool staticBranch = DecodeBranch(thumb, instrs[i], cond, target);
|
||||||
|
JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched);
|
||||||
|
|
||||||
|
if (staticBranch)
|
||||||
|
{
|
||||||
|
bool isBackJump = false;
|
||||||
|
if (hasBranched)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
{
|
||||||
|
if (instrs[i].Addr == target)
|
||||||
|
{
|
||||||
|
isBackJump = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart)
|
||||||
|
{
|
||||||
|
// we might have an idle loop
|
||||||
|
u32 offset = (target - blockAddr) / (thumb ? 2 : 4);
|
||||||
|
if (IsIdleLoop(instrs + offset, i - offset + 1))
|
||||||
|
{
|
||||||
|
instrs[i].BranchFlags |= branch_IdleBranch;
|
||||||
|
JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (hasBranched && (!thumb || cond == 0xE) && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
|
||||||
|
{
|
||||||
|
u32 targetPseudoPhysical = cpu->Num == 0
|
||||||
|
? TranslateAddr<0>(target)
|
||||||
|
: TranslateAddr<1>(target);
|
||||||
|
|
||||||
|
r15 = target + (thumb ? 2 : 4);
|
||||||
|
assert(r15 == cpu->R[15]);
|
||||||
|
|
||||||
|
JIT_DEBUGPRINT("block lengthened by static branch (target %x)\n", target);
|
||||||
|
|
||||||
|
nextInstr[0] = cpu->NextInstr[0];
|
||||||
|
nextInstr[1] = cpu->NextInstr[1];
|
||||||
|
|
||||||
|
nextInstrAddr[0] = target;
|
||||||
|
nextInstrAddr[1] = r15;
|
||||||
|
|
||||||
|
lastSegmentStart = target;
|
||||||
|
|
||||||
|
instrs[i].Info.EndBlock = false;
|
||||||
|
|
||||||
|
if (cond < 0xE)
|
||||||
|
instrs[i].BranchFlags |= branch_FollowCondTaken;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasBranched && cond < 0xE && i + 1 < Config::JIT_MaxBlockSize)
|
||||||
|
{
|
||||||
|
instrs[i].Info.EndBlock = false;
|
||||||
|
instrs[i].BranchFlags |= branch_FollowCondNotTaken;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
|
|
||||||
bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind);
|
bool canCompile = compiler->CanCompile(thumb, instrs[i - 1].Info.Kind);
|
||||||
if (instrs[i - 1].Info.ReadFlags != 0 || !canCompile)
|
bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken));
|
||||||
floodFillSetFlags(instrs, i - 2, canCompile ? instrs[i - 1].Info.ReadFlags : 0xF);
|
if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond)
|
||||||
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize);
|
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
|
||||||
|
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted);
|
||||||
|
|
||||||
floodFillSetFlags(instrs, i - 1, 0xF);
|
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
|
||||||
|
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
|
||||||
|
bool mayRestore = true;
|
||||||
|
if (prevBlock && prevBlock->PseudoPhysicalAddr == pseudoPhysicalAddr)
|
||||||
|
{
|
||||||
|
RestoreCandidates[restoreSlot] = NULL;
|
||||||
|
if (prevBlock->NumInstrs == i)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
{
|
||||||
|
if (prevBlock->Instrs()[j] != instrs[j].Instr)
|
||||||
|
{
|
||||||
|
mayRestore = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
mayRestore = false;
|
||||||
|
|
||||||
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
|
if (prevBlock->NumAddresses == numAddressRanges)
|
||||||
|
{
|
||||||
if (cpu->Num == 0)
|
for (int j = 0; j < numAddressRanges; j++)
|
||||||
InsertBlock<0>(blockAddr, block);
|
{
|
||||||
|
if (prevBlock->AddressRanges()[j] != addresseRanges[j])
|
||||||
|
{
|
||||||
|
mayRestore = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
mayRestore = false;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
InsertBlock<1>(blockAddr, block);
|
{
|
||||||
|
mayRestore = false;
|
||||||
|
prevBlock = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
return block;
|
JitBlock* block;
|
||||||
|
if (!mayRestore)
|
||||||
|
{
|
||||||
|
if (prevBlock)
|
||||||
|
delete prevBlock;
|
||||||
|
|
||||||
|
block = new JitBlock(i, numAddressRanges);
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
block->Instrs()[j] = instrs[j].Instr;
|
||||||
|
for (int j = 0; j < numAddressRanges; j++)
|
||||||
|
block->AddressRanges()[j] = addresseRanges[j];
|
||||||
|
|
||||||
|
block->StartAddr = blockAddr;
|
||||||
|
block->PseudoPhysicalAddr = pseudoPhysicalAddr;
|
||||||
|
|
||||||
|
FloodFillSetFlags(instrs, i - 1, 0xF);
|
||||||
|
|
||||||
|
block->EntryPoint = compiler->CompileBlock(cpu, thumb, instrs, i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("restored! %p\n", prevBlock);
|
||||||
|
block = prevBlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < numAddressRanges; j++)
|
||||||
|
{
|
||||||
|
assert(addresseRanges[j] == block->AddressRanges()[j]);
|
||||||
|
CodeRanges[addresseRanges[j] / 256].Blocks.Add(block);
|
||||||
|
}
|
||||||
|
|
||||||
|
FastBlockAccess[block->PseudoPhysicalAddr / 2] = block->EntryPoint;
|
||||||
|
|
||||||
|
JitBlocks.Add(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
void InvalidateBlockCache()
|
void InvalidateByAddr(u32 pseudoPhysical)
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
|
||||||
|
AddressRange* range = &CodeRanges[pseudoPhysical / 256];
|
||||||
|
int startLength = range->Blocks.Length;
|
||||||
|
for (int i = 0; i < range->Blocks.Length; i++)
|
||||||
|
{
|
||||||
|
assert(range->Blocks.Length == startLength);
|
||||||
|
JitBlock* block = range->Blocks[i];
|
||||||
|
for (int j = 0; j < block->NumAddresses; j++)
|
||||||
|
{
|
||||||
|
u32 addr = block->AddressRanges()[j];
|
||||||
|
if ((addr / 256) != (pseudoPhysical / 256))
|
||||||
|
{
|
||||||
|
AddressRange* otherRange = &CodeRanges[addr / 256];
|
||||||
|
assert(otherRange != range);
|
||||||
|
assert(otherRange->Blocks.RemoveByValue(block));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(JitBlocks.RemoveByValue(block));
|
||||||
|
|
||||||
|
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
||||||
|
|
||||||
|
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
||||||
|
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
||||||
|
delete RestoreCandidates[slot];
|
||||||
|
|
||||||
|
RestoreCandidates[slot] = block;
|
||||||
|
}
|
||||||
|
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
||||||
|
range->TimesInvalidated++;
|
||||||
|
|
||||||
|
range->Blocks.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidateByAddr7(u32 addr)
|
||||||
|
{
|
||||||
|
u32 pseudoPhysical = TranslateAddr<1>(addr);
|
||||||
|
if (__builtin_expect(CodeRanges[pseudoPhysical / 256].Blocks.Length > 0, false))
|
||||||
|
InvalidateByAddr(pseudoPhysical);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidateITCM(u32 addr)
|
||||||
|
{
|
||||||
|
u32 pseudoPhysical = addr + ExeMemRegionOffsets[exeMem_ITCM];
|
||||||
|
if (CodeRanges[pseudoPhysical / 256].Blocks.Length > 0)
|
||||||
|
InvalidateByAddr(pseudoPhysical);
|
||||||
|
}
|
||||||
|
|
||||||
|
void InvalidateAll()
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("invalidating all %x\n", JitBlocks.Length);
|
||||||
|
for (int i = 0; i < JitBlocks.Length; i++)
|
||||||
|
{
|
||||||
|
JitBlock* block = JitBlocks[i];
|
||||||
|
|
||||||
|
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
||||||
|
|
||||||
|
for (int j = 0; j < block->NumAddresses; j++)
|
||||||
|
{
|
||||||
|
u32 addr = block->AddressRanges()[j];
|
||||||
|
AddressRange* range = &CodeRanges[addr / 256];
|
||||||
|
range->Blocks.Clear();
|
||||||
|
if (range->TimesInvalidated + 1 > range->TimesInvalidated)
|
||||||
|
range->TimesInvalidated++;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
||||||
|
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
||||||
|
delete RestoreCandidates[slot];
|
||||||
|
|
||||||
|
RestoreCandidates[slot] = block;
|
||||||
|
}
|
||||||
|
|
||||||
|
JitBlocks.Clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ResetBlockCache()
|
||||||
{
|
{
|
||||||
printf("Resetting JIT block cache...\n");
|
printf("Resetting JIT block cache...\n");
|
||||||
|
|
||||||
memset(cache.MainRAM, 0, sizeof(cache.MainRAM));
|
memset(FastBlockAccess, 0, sizeof(FastBlockAccess));
|
||||||
memset(cache.SWRAM, 0, sizeof(cache.SWRAM));
|
for (int i = 0; i < sizeof(RestoreCandidates)/sizeof(RestoreCandidates[0]); i++)
|
||||||
memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS));
|
{
|
||||||
memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM));
|
if (RestoreCandidates[i])
|
||||||
memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC));
|
{
|
||||||
memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS));
|
delete RestoreCandidates[i];
|
||||||
memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM));
|
RestoreCandidates[i] = NULL;
|
||||||
memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM));
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < JitBlocks.Length; i++)
|
||||||
|
{
|
||||||
|
JitBlock* block = JitBlocks[i];
|
||||||
|
for (int j = 0; j < block->NumAddresses; j++)
|
||||||
|
{
|
||||||
|
u32 addr = block->AddressRanges()[j];
|
||||||
|
CodeRanges[addr / 256].Blocks.Clear();
|
||||||
|
CodeRanges[addr / 256].TimesInvalidated = 0;
|
||||||
|
}
|
||||||
|
delete block;
|
||||||
|
}
|
||||||
|
JitBlocks.Clear();
|
||||||
|
|
||||||
compiler->Reset();
|
compiler->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
|
||||||
|
{
|
||||||
|
if (cpu->Num == 0)
|
||||||
|
{
|
||||||
|
if ((addr & 0xFF000000) == 0x04000000)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
unfortunately we can't map GPU2D this way
|
||||||
|
since it's hidden inside an object
|
||||||
|
|
||||||
|
though GPU3D registers are accessed much more intensive
|
||||||
|
*/
|
||||||
|
if (addr >= 0x04000320 && addr < 0x040006A4)
|
||||||
|
{
|
||||||
|
switch (size | store)
|
||||||
|
{
|
||||||
|
case 8: return (void*)GPU3D::Read8;
|
||||||
|
case 9: return (void*)GPU3D::Write8;
|
||||||
|
case 16: return (void*)GPU3D::Read16;
|
||||||
|
case 17: return (void*)GPU3D::Write16;
|
||||||
|
case 32: return (void*)GPU3D::Read32;
|
||||||
|
case 33: return (void*)GPU3D::Write32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (size | store)
|
||||||
|
{
|
||||||
|
case 8: return (void*)NDS::ARM9IORead8;
|
||||||
|
case 9: return (void*)NDS::ARM9IOWrite8;
|
||||||
|
case 16: return (void*)NDS::ARM9IORead16;
|
||||||
|
case 17: return (void*)NDS::ARM9IOWrite16;
|
||||||
|
case 32: return (void*)NDS::ARM9IORead32;
|
||||||
|
case 33: return (void*)NDS::ARM9IOWrite32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (addr & 0xFF800000)
|
||||||
|
{
|
||||||
|
case 0x04000000:
|
||||||
|
if (addr >= 0x04000400 && addr < 0x04000520)
|
||||||
|
{
|
||||||
|
switch (size | store)
|
||||||
|
{
|
||||||
|
case 8: return (void*)SPU::Read8;
|
||||||
|
case 9: return (void*)SPU::Write8;
|
||||||
|
case 16: return (void*)SPU::Read16;
|
||||||
|
case 17: return (void*)SPU::Write16;
|
||||||
|
case 32: return (void*)SPU::Read32;
|
||||||
|
case 33: return (void*)SPU::Write32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (size | store)
|
||||||
|
{
|
||||||
|
case 8: return (void*)NDS::ARM7IORead8;
|
||||||
|
case 9: return (void*)NDS::ARM7IOWrite8;
|
||||||
|
case 16: return (void*)NDS::ARM7IORead16;
|
||||||
|
case 17: return (void*)NDS::ARM7IOWrite16;
|
||||||
|
case 32: return (void*)NDS::ARM7IORead32;
|
||||||
|
case 33: return (void*)NDS::ARM7IOWrite32;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 0x04800000:
|
||||||
|
if (addr < 0x04810000 && size == 16)
|
||||||
|
{
|
||||||
|
if (store)
|
||||||
|
return (void*)Wifi::Write;
|
||||||
|
else
|
||||||
|
return (void*)Wifi::Read;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
141
src/ARMJIT.h
141
src/ARMJIT.h
@ -9,142 +9,67 @@
|
|||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef u32 (*CompiledBlock)();
|
enum ExeMemKind
|
||||||
|
|
||||||
struct FetchedInstr
|
|
||||||
{
|
{
|
||||||
u32 A_Reg(int pos) const
|
exeMem_Unmapped = 0,
|
||||||
{
|
exeMem_ITCM,
|
||||||
return (Instr >> pos) & 0xF;
|
exeMem_MainRAM,
|
||||||
}
|
exeMem_SWRAM,
|
||||||
|
exeMem_LCDC,
|
||||||
u32 T_Reg(int pos) const
|
exeMem_ARM9_BIOS,
|
||||||
{
|
exeMem_ARM7_BIOS,
|
||||||
return (Instr >> pos) & 0x7;
|
exeMem_ARM7_WRAM,
|
||||||
}
|
exeMem_ARM7_WVRAM,
|
||||||
|
exeMem_Count
|
||||||
u32 Cond() const
|
|
||||||
{
|
|
||||||
return Instr >> 28;
|
|
||||||
}
|
|
||||||
|
|
||||||
u8 SetFlags;
|
|
||||||
u32 Instr;
|
|
||||||
u32 NextInstr[2];
|
|
||||||
u32 Addr;
|
|
||||||
|
|
||||||
u8 CodeCycles;
|
|
||||||
|
|
||||||
ARMInstrInfo::Info Info;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
extern const u32 ExeMemRegionOffsets[];
|
||||||
Copied from DeSmuME
|
extern const u32 ExeMemRegionSizes[];
|
||||||
Some names where changed to match the nomenclature of melonDS
|
|
||||||
|
|
||||||
Since it's nowhere explained and atleast I needed some time to get behind it,
|
typedef u32 (*JitBlockEntry)();
|
||||||
here's a summary on how it works:
|
|
||||||
more or less all memory locations from which code can be executed are
|
|
||||||
represented by an array of function pointers, which point to null or
|
|
||||||
a function which executes a block instructions starting from there.
|
|
||||||
|
|
||||||
The most significant 4 bits of each address is ignored. This 28 bit space is
|
extern u32 AddrTranslate9[0x2000];
|
||||||
divided into 0x2000 32 KB for ARM9 and 0x4000 16 KB for ARM7, each of which
|
extern u32 AddrTranslate7[0x4000];
|
||||||
a pointer to the relevant place inside the afore mentioned arrays. 32 and 16 KB
|
|
||||||
are the sizes of the smallest contigous memory region mapped to the respective CPU.
|
|
||||||
Because ARM addresses are always aligned to 4 bytes and Thumb to a 2 byte boundary,
|
|
||||||
we only need every second half word to be adressable.
|
|
||||||
|
|
||||||
In case a memory write hits mapped memory, the function block at this
|
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
|
||||||
address is set to null, so it's recompiled the next time it's executed.
|
extern JitBlockEntry FastBlockAccess[ExeMemSpaceSize / 2];
|
||||||
|
|
||||||
This method has disadvantages, namely that only writing to the
|
|
||||||
first instruction of a block marks it as invalid and that memory remapping
|
|
||||||
(SWRAM and VRAM) isn't taken into account.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct BlockCache
|
|
||||||
{
|
|
||||||
CompiledBlock* AddrMapping9[0x2000] = {0};
|
|
||||||
CompiledBlock* AddrMapping7[0x4000] = {0};
|
|
||||||
|
|
||||||
CompiledBlock MainRAM[4*1024*1024/2];
|
|
||||||
CompiledBlock SWRAM[0x8000/2]; // Shared working RAM
|
|
||||||
CompiledBlock ARM9_ITCM[0x8000/2];
|
|
||||||
CompiledBlock ARM9_LCDC[0xA4000/2];
|
|
||||||
CompiledBlock ARM9_BIOS[0x8000/2];
|
|
||||||
CompiledBlock ARM7_BIOS[0x4000/2];
|
|
||||||
CompiledBlock ARM7_WRAM[0x10000/2]; // dedicated ARM7 WRAM
|
|
||||||
CompiledBlock ARM7_WVRAM[0x40000/2]; // VRAM allocated as Working RAM
|
|
||||||
};
|
|
||||||
|
|
||||||
extern BlockCache cache;
|
|
||||||
|
|
||||||
template <u32 num>
|
template <u32 num>
|
||||||
inline bool IsMapped(u32 addr)
|
inline bool IsMapped(u32 addr)
|
||||||
{
|
{
|
||||||
if (num == 0)
|
if (num == 0)
|
||||||
return cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15];
|
return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] >= ExeMemRegionSizes[exeMem_Unmapped];
|
||||||
else
|
else
|
||||||
return cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14];
|
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] >= ExeMemRegionSizes[exeMem_Unmapped];
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 num>
|
template <u32 num>
|
||||||
inline CompiledBlock LookUpBlock(u32 addr)
|
inline u32 TranslateAddr(u32 addr)
|
||||||
{
|
{
|
||||||
if (num == 0)
|
if (num == 0)
|
||||||
return cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1];
|
return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] + (addr & 0x7FFF);
|
||||||
else
|
else
|
||||||
return cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1];
|
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <u32 num>
|
template <u32 num>
|
||||||
inline void Invalidate16(u32 addr)
|
inline JitBlockEntry LookUpBlock(u32 addr)
|
||||||
{
|
{
|
||||||
if (IsMapped<num>(addr))
|
return FastBlockAccess[TranslateAddr<num>(addr) / 2];
|
||||||
{
|
|
||||||
if (num == 0)
|
|
||||||
cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1] = NULL;
|
|
||||||
else
|
|
||||||
cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <u32 num>
|
|
||||||
inline void Invalidate32(u32 addr)
|
|
||||||
{
|
|
||||||
if (IsMapped<num>(addr))
|
|
||||||
{
|
|
||||||
if (num == 0)
|
|
||||||
{
|
|
||||||
CompiledBlock* page = cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15];
|
|
||||||
page[(addr & 0x7FFF) >> 1] = NULL;
|
|
||||||
page[((addr + 2) & 0x7FFF) >> 1] = NULL;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CompiledBlock* page = cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14];
|
|
||||||
page[(addr & 0x3FFF) >> 1] = NULL;
|
|
||||||
page[((addr + 2) & 0x3FFF) >> 1] = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <u32 num>
|
|
||||||
inline void InsertBlock(u32 addr, CompiledBlock func)
|
|
||||||
{
|
|
||||||
if (num == 0)
|
|
||||||
cache.AddrMapping9[(addr & 0xFFFFFFF) >> 15][(addr & 0x7FFF) >> 1] = func;
|
|
||||||
else
|
|
||||||
cache.AddrMapping7[(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Init();
|
void Init();
|
||||||
void DeInit();
|
void DeInit();
|
||||||
|
|
||||||
CompiledBlock CompileBlock(ARM* cpu);
|
void InvalidateByAddr(u32 pseudoPhysical);
|
||||||
|
void InvalidateAll();
|
||||||
|
|
||||||
void InvalidateBlockCache();
|
void InvalidateITCM(u32 addr);
|
||||||
|
void InvalidateByAddr7(u32 addr);
|
||||||
|
|
||||||
|
void CompileBlock(ARM* cpu);
|
||||||
|
|
||||||
|
void ResetBlockCache();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
198
src/ARMJIT_Internal.h
Normal file
198
src/ARMJIT_Internal.h
Normal file
@ -0,0 +1,198 @@
|
|||||||
|
#ifndef ARMJIT_INTERNAL_H
|
||||||
|
#define ARMJIT_INTERNAL_H
|
||||||
|
|
||||||
|
#include "types.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include "ARMJIT.h"
|
||||||
|
|
||||||
|
// here lands everything which doesn't fit into ARMJIT.h
|
||||||
|
// where it would be included by pretty much everything
|
||||||
|
namespace ARMJIT
|
||||||
|
{
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
branch_IdleBranch = 1 << 0,
|
||||||
|
branch_FollowCondTaken = 1 << 1,
|
||||||
|
branch_FollowCondNotTaken = 1 << 2
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FetchedInstr
|
||||||
|
{
|
||||||
|
u32 A_Reg(int pos) const
|
||||||
|
{
|
||||||
|
return (Instr >> pos) & 0xF;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 T_Reg(int pos) const
|
||||||
|
{
|
||||||
|
return (Instr >> pos) & 0x7;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Cond() const
|
||||||
|
{
|
||||||
|
return Instr >> 28;
|
||||||
|
}
|
||||||
|
|
||||||
|
u8 BranchFlags;
|
||||||
|
u8 SetFlags;
|
||||||
|
u32 Instr;
|
||||||
|
u32 NextInstr[2];
|
||||||
|
u32 Addr;
|
||||||
|
|
||||||
|
u8 CodeCycles;
|
||||||
|
u8 DataCycles;
|
||||||
|
u8 DataRegion;
|
||||||
|
|
||||||
|
ARMInstrInfo::Info Info;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
TinyVector
|
||||||
|
- because reinventing the wheel is the best!
|
||||||
|
|
||||||
|
- meant to be used very often, with not so many elements
|
||||||
|
max 1 << 16 elements
|
||||||
|
- doesn't allocate while no elements are inserted
|
||||||
|
- not stl confirmant of course
|
||||||
|
- probably only works with POD types
|
||||||
|
- remove operations don't preserve order, but O(1)!
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
struct __attribute__((packed)) TinyVector
|
||||||
|
{
|
||||||
|
T* Data = NULL;
|
||||||
|
u16 Capacity = 0;
|
||||||
|
u32 Length = 0; // make it 32 bit so we don't need movzx
|
||||||
|
|
||||||
|
~TinyVector()
|
||||||
|
{
|
||||||
|
delete[] Data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MakeCapacity(u32 capacity)
|
||||||
|
{
|
||||||
|
assert(capacity <= UINT16_MAX);
|
||||||
|
assert(capacity > Capacity);
|
||||||
|
T* newMem = new T[capacity];
|
||||||
|
if (Data != NULL)
|
||||||
|
memcpy(newMem, Data, sizeof(Data) * Length);
|
||||||
|
|
||||||
|
T* oldData = Data;
|
||||||
|
Data = newMem;
|
||||||
|
if (oldData != NULL)
|
||||||
|
delete[] oldData;
|
||||||
|
|
||||||
|
Capacity = capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clear()
|
||||||
|
{
|
||||||
|
Length = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Add(T element)
|
||||||
|
{
|
||||||
|
assert(Length + 1 <= UINT16_MAX);
|
||||||
|
if (Length + 1 > Capacity)
|
||||||
|
MakeCapacity(((Capacity + 4) * 3) / 2);
|
||||||
|
|
||||||
|
Data[Length++] = element;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Remove(int index)
|
||||||
|
{
|
||||||
|
assert(index >= 0 && index < Length);
|
||||||
|
|
||||||
|
Length--;
|
||||||
|
Data[index] = Data[Length];
|
||||||
|
/*for (int i = index; i < Length; i++)
|
||||||
|
Data[i] = Data[i + 1];*/
|
||||||
|
}
|
||||||
|
|
||||||
|
int Find(T needle)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Length; i++)
|
||||||
|
{
|
||||||
|
if (Data[i] == needle)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RemoveByValue(T needle)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < Length; i++)
|
||||||
|
{
|
||||||
|
if (Data[i] == needle)
|
||||||
|
{
|
||||||
|
Remove(i);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
T& operator[](int index)
|
||||||
|
{
|
||||||
|
assert(index >= 0 && index < Length);
|
||||||
|
return Data[index];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class JitBlock
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
JitBlock(u32 numInstrs, u32 numAddresses)
|
||||||
|
{
|
||||||
|
NumInstrs = numInstrs;
|
||||||
|
NumAddresses = numAddresses;
|
||||||
|
Data = new u32[numInstrs + numAddresses];
|
||||||
|
}
|
||||||
|
|
||||||
|
~JitBlock()
|
||||||
|
{
|
||||||
|
delete[] Data;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 StartAddr;
|
||||||
|
u32 PseudoPhysicalAddr;
|
||||||
|
|
||||||
|
u32 NumInstrs;
|
||||||
|
u32 NumAddresses;
|
||||||
|
|
||||||
|
JitBlockEntry EntryPoint;
|
||||||
|
|
||||||
|
u32* Instrs()
|
||||||
|
{ return Data; }
|
||||||
|
u32* AddressRanges()
|
||||||
|
{ return Data + NumInstrs; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
/*
|
||||||
|
0..<NumInstrs - the instructions of the block
|
||||||
|
NumInstrs..<(NumLinks + NumInstrs) - pseudo physical addresses where the block is located
|
||||||
|
(atleast one, the pseudo physical address of the block)
|
||||||
|
*/
|
||||||
|
u32* Data;
|
||||||
|
};
|
||||||
|
|
||||||
|
// size should be 16 bytes because I'm to lazy to use mul and whatnot
|
||||||
|
struct __attribute__((packed)) AddressRange
|
||||||
|
{
|
||||||
|
TinyVector<JitBlock*> Blocks;
|
||||||
|
u16 TimesInvalidated;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern AddressRange CodeRanges[ExeMemSpaceSize / 256];
|
||||||
|
|
||||||
|
typedef void (*InterpreterFunc)(ARM* cpu);
|
||||||
|
extern InterpreterFunc InterpretARM[];
|
||||||
|
extern InterpreterFunc InterpretTHUMB[];
|
||||||
|
|
||||||
|
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -60,15 +60,46 @@ public:
|
|||||||
assert("Welp!");
|
assert("Welp!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PutLiteral(int reg, u32 val)
|
||||||
|
{
|
||||||
|
LiteralsLoaded |= (1 << reg);
|
||||||
|
LiteralValues[reg] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnloadLiteral(int reg)
|
||||||
|
{
|
||||||
|
LiteralsLoaded &= ~(1 << reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsLiteral(int reg)
|
||||||
|
{
|
||||||
|
return LiteralsLoaded & (1 << reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrepareExit()
|
||||||
|
{
|
||||||
|
BitSet16 dirtyRegs(DirtyRegs);
|
||||||
|
for (int reg : dirtyRegs)
|
||||||
|
Compiler->SaveReg(reg, Mapping[reg]);
|
||||||
|
}
|
||||||
|
|
||||||
void Flush()
|
void Flush()
|
||||||
{
|
{
|
||||||
BitSet16 loadedSet(LoadedRegs);
|
BitSet16 loadedSet(LoadedRegs);
|
||||||
for (int reg : loadedSet)
|
for (int reg : loadedSet)
|
||||||
UnloadRegister(reg);
|
UnloadRegister(reg);
|
||||||
|
LiteralsLoaded = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Prepare(bool thumb, int i)
|
void Prepare(bool thumb, int i)
|
||||||
{
|
{
|
||||||
|
if (LoadedRegs & (1 << 15))
|
||||||
|
UnloadRegister(15);
|
||||||
|
|
||||||
|
BitSet16 invalidedLiterals(LiteralsLoaded & Instrs[i].Info.DstRegs);
|
||||||
|
for (int reg : invalidedLiterals)
|
||||||
|
UnloadLiteral(reg);
|
||||||
|
|
||||||
u16 futureNeeded = 0;
|
u16 futureNeeded = 0;
|
||||||
int ranking[16];
|
int ranking[16];
|
||||||
for (int j = 0; j < 16; j++)
|
for (int j = 0; j < 16; j++)
|
||||||
@ -86,7 +117,7 @@ public:
|
|||||||
for (int reg : neverNeededAgain)
|
for (int reg : neverNeededAgain)
|
||||||
UnloadRegister(reg);
|
UnloadRegister(reg);
|
||||||
|
|
||||||
FetchedInstr Instr = Instrs[i];
|
FetchedInstr Instr = Instrs[i];
|
||||||
u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs;
|
u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs;
|
||||||
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
|
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
|
||||||
if (needToBeLoaded != BitSet16(0))
|
if (needToBeLoaded != BitSet16(0))
|
||||||
@ -125,6 +156,9 @@ public:
|
|||||||
static const int NativeRegsAvailable;
|
static const int NativeRegsAvailable;
|
||||||
|
|
||||||
Reg Mapping[16];
|
Reg Mapping[16];
|
||||||
|
u32 LiteralValues[16];
|
||||||
|
|
||||||
|
u16 LiteralsLoaded = 0;
|
||||||
u32 NativeRegsUsed = 0;
|
u32 NativeRegsUsed = 0;
|
||||||
u16 LoadedRegs = 0;
|
u16 LoadedRegs = 0;
|
||||||
u16 DirtyRegs = 0;
|
u16 DirtyRegs = 0;
|
||||||
|
@ -213,7 +213,13 @@ void Compiler::A_Comp_MovOp()
|
|||||||
MOV(32, rd, op2);
|
MOV(32, rd, op2);
|
||||||
|
|
||||||
if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
|
if (((CurInstr.Instr >> 21) & 0xF) == 0xF)
|
||||||
|
{
|
||||||
NOT(32, rd);
|
NOT(32, rd);
|
||||||
|
if (op2.IsImm() && CurInstr.Cond() == 0xE)
|
||||||
|
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm32());
|
||||||
|
}
|
||||||
|
else if (op2.IsImm() && CurInstr.Cond() == 0xE)
|
||||||
|
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm32());
|
||||||
|
|
||||||
if (S)
|
if (S)
|
||||||
{
|
{
|
||||||
@ -564,7 +570,13 @@ void Compiler::T_Comp_AddSub_()
|
|||||||
|
|
||||||
Comp_AddCycles_C();
|
Comp_AddCycles_C();
|
||||||
|
|
||||||
if (op & 1)
|
// special case for thumb mov being alias to add rd, rn, #0
|
||||||
|
if (CurInstr.SetFlags == 0 && rn.IsImm() && rn.Imm32() == 0)
|
||||||
|
{
|
||||||
|
if (rd != rs)
|
||||||
|
MOV(32, rd, rs);
|
||||||
|
}
|
||||||
|
else if (op & 1)
|
||||||
Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
|
Comp_ArithTriOp(&Compiler::SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV);
|
||||||
else
|
else
|
||||||
Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
|
Comp_ArithTriOp(&Compiler::ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV);
|
||||||
@ -614,7 +626,7 @@ void Compiler::T_Comp_ALU()
|
|||||||
u32 op = (CurInstr.Instr >> 6) & 0xF;
|
u32 op = (CurInstr.Instr >> 6) & 0xF;
|
||||||
|
|
||||||
if ((op >= 0x2 && op < 0x4) || op == 0x7)
|
if ((op >= 0x2 && op < 0x4) || op == 0x7)
|
||||||
Comp_AddCycles_CI(1);
|
Comp_AddCycles_CI(1); // shift by reg
|
||||||
else
|
else
|
||||||
Comp_AddCycles_C();
|
Comp_AddCycles_C();
|
||||||
|
|
||||||
|
@ -16,9 +16,6 @@ int squeezePointer(T* ptr)
|
|||||||
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
{
|
{
|
||||||
// we can simplify constant branches by a lot
|
// we can simplify constant branches by a lot
|
||||||
// it's not completely safe to assume stuff like, which instructions to preload
|
|
||||||
// we'll see how it works out
|
|
||||||
|
|
||||||
IrregularCycles = true;
|
IrregularCycles = true;
|
||||||
|
|
||||||
u32 newPC;
|
u32 newPC;
|
||||||
@ -39,18 +36,12 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
|||||||
{
|
{
|
||||||
ARMv5* cpu9 = (ARMv5*)CurCPU;
|
ARMv5* cpu9 = (ARMv5*)CurCPU;
|
||||||
|
|
||||||
u32 oldregion = R15 >> 24;
|
|
||||||
u32 newregion = addr >> 24;
|
|
||||||
|
|
||||||
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
|
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
|
||||||
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
|
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
|
||||||
cpu9->RegionCodeCycles = regionCodeCycles;
|
cpu9->RegionCodeCycles = regionCodeCycles;
|
||||||
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
if (Exit)
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
||||||
bool setupRegion = newregion != oldregion;
|
|
||||||
if (setupRegion)
|
|
||||||
cpu9->SetupCodeMem(addr);
|
|
||||||
|
|
||||||
if (addr & 0x1)
|
if (addr & 0x1)
|
||||||
{
|
{
|
||||||
@ -83,12 +74,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
|||||||
cycles += cpu9->CodeCycles;
|
cycles += cpu9->CodeCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
MOV(64, MDisp(RCPU, offsetof(ARM, CodeMem.Mem)), Imm32(squeezePointer(cpu9->CodeMem.Mem)));
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeMem.Mask)), Imm32(cpu9->CodeMem.Mask));
|
|
||||||
|
|
||||||
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
||||||
if (setupRegion)
|
|
||||||
cpu9->SetupCodeMem(R15);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -100,8 +86,11 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
|||||||
cpu7->CodeRegion = codeRegion;
|
cpu7->CodeRegion = codeRegion;
|
||||||
cpu7->CodeCycles = codeCycles;
|
cpu7->CodeCycles = codeCycles;
|
||||||
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
|
if (Exit)
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
|
{
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(codeCycles));
|
||||||
|
}
|
||||||
|
|
||||||
if (addr & 0x1)
|
if (addr & 0x1)
|
||||||
{
|
{
|
||||||
@ -133,7 +122,8 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
|||||||
cpu7->CodeCycles = addr >> 15;
|
cpu7->CodeCycles = addr >> 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
if (Exit)
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
||||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
else
|
else
|
||||||
@ -219,10 +209,23 @@ void Compiler::T_Comp_BCOND()
|
|||||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||||
Comp_JumpTo(R15 + offset + 1, true);
|
Comp_JumpTo(R15 + offset + 1, true);
|
||||||
|
|
||||||
|
Comp_SpecialBranchBehaviour();
|
||||||
|
|
||||||
FixupBranch skipFailed = J();
|
FixupBranch skipFailed = J();
|
||||||
SetJumpTarget(skipExecute);
|
SetJumpTarget(skipExecute);
|
||||||
|
|
||||||
|
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||||
|
{
|
||||||
|
RegCache.PrepareExit();
|
||||||
|
SaveCPSR(false);
|
||||||
|
|
||||||
|
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||||
|
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
Comp_AddCycles_C(true);
|
Comp_AddCycles_C(true);
|
||||||
SetJumpTarget(skipFailed);
|
SetJumpTarget(skipFailed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_B()
|
void Compiler::T_Comp_B()
|
||||||
|
@ -72,12 +72,15 @@ Compiler::Compiler()
|
|||||||
for (int i = 0; i < 3; i++)
|
for (int i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
for (int j = 0; j < 2; j++)
|
for (int j = 0; j < 2; j++)
|
||||||
{
|
|
||||||
MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
|
MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
|
||||||
MemoryFuncs7[i][j][0] = Gen_MemoryRoutine7(j, false, 8 << i);
|
|
||||||
MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
MemoryFuncs7[0][0] = (void*)NDS::ARM7Read8;
|
||||||
|
MemoryFuncs7[0][1] = (void*)NDS::ARM7Write8;
|
||||||
|
MemoryFuncs7[1][0] = (void*)NDS::ARM7Read16;
|
||||||
|
MemoryFuncs7[1][1] = (void*)NDS::ARM7Write16;
|
||||||
|
MemoryFuncs7[2][0] = (void*)NDS::ARM7Read32;
|
||||||
|
MemoryFuncs7[2][1] = (void*)NDS::ARM7Write32;
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++)
|
for (int i = 0; i < 2; i++)
|
||||||
for (int j = 0; j < 2; j++)
|
for (int j = 0; j < 2; j++)
|
||||||
{
|
{
|
||||||
@ -179,12 +182,13 @@ void Compiler::LoadCPSR()
|
|||||||
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
|
MOV(32, R(RCPSR), MDisp(RCPU, offsetof(ARM, CPSR)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::SaveCPSR()
|
void Compiler::SaveCPSR(bool flagClean)
|
||||||
{
|
{
|
||||||
if (CPSRDirty)
|
if (CPSRDirty)
|
||||||
{
|
{
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
|
MOV(32, MDisp(RCPU, offsetof(ARM, CPSR)), R(RCPSR));
|
||||||
CPSRDirty = false;
|
if (flagClean)
|
||||||
|
CPSRDirty = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,6 +208,9 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg)
|
|||||||
// invalidates RSCRATCH and RSCRATCH3
|
// invalidates RSCRATCH and RSCRATCH3
|
||||||
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
||||||
{
|
{
|
||||||
|
// hack, ldm/stm can get really big TODO: make this better
|
||||||
|
bool ldmStm = !Thumb &&
|
||||||
|
(CurInstr.Info.Kind == ARMInstrInfo::ak_LDM || CurInstr.Info.Kind == ARMInstrInfo::ak_STM);
|
||||||
if (cond >= 0x8)
|
if (cond >= 0x8)
|
||||||
{
|
{
|
||||||
static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
|
static_assert(RSCRATCH3 == ECX, "RSCRATCH has to be equal to ECX!");
|
||||||
@ -213,14 +220,14 @@ Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
|||||||
SHL(32, R(RSCRATCH), R(RSCRATCH3));
|
SHL(32, R(RSCRATCH), R(RSCRATCH3));
|
||||||
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
|
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
|
||||||
|
|
||||||
return J_CC(CC_Z);
|
return J_CC(CC_Z, ldmStm);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// could have used a LUT, but then where would be the fun?
|
// could have used a LUT, but then where would be the fun?
|
||||||
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
|
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
|
||||||
|
|
||||||
return J_CC(cond & 1 ? CC_NZ : CC_Z);
|
return J_CC(cond & 1 ? CC_NZ : CC_Z, ldmStm);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,25 +361,34 @@ void Compiler::Reset()
|
|||||||
SetCodePtr(ResetStart);
|
SetCodePtr(ResetStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
|
void Compiler::Comp_SpecialBranchBehaviour()
|
||||||
|
{
|
||||||
|
if (CurInstr.BranchFlags & branch_IdleBranch)
|
||||||
|
OR(32, MDisp(RCPU, offsetof(ARM, Halted)), Imm8(0x20));
|
||||||
|
|
||||||
|
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
||||||
|
{
|
||||||
|
RegCache.PrepareExit();
|
||||||
|
SaveCPSR(false);
|
||||||
|
|
||||||
|
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||||
|
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||||
{
|
{
|
||||||
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
||||||
InvalidateBlockCache();
|
ResetBlockCache();
|
||||||
|
|
||||||
ConstantCycles = 0;
|
ConstantCycles = 0;
|
||||||
Thumb = cpu->CPSR & 0x20;
|
Thumb = thumb;
|
||||||
Num = cpu->Num;
|
Num = cpu->Num;
|
||||||
CodeRegion = cpu->CodeRegion;
|
CodeRegion = instrs[0].Addr >> 24;
|
||||||
CurCPU = cpu;
|
CurCPU = cpu;
|
||||||
|
|
||||||
CompiledBlock res = (CompiledBlock)GetWritableCodePtr();
|
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
|
||||||
|
|
||||||
if (!(Num == 0
|
|
||||||
? IsMapped<0>(instrs[0].Addr - (Thumb ? 2 : 4))
|
|
||||||
: IsMapped<1>(instrs[0].Addr - (Thumb ? 2 : 4))))
|
|
||||||
{
|
|
||||||
printf("Trying to compile a block in unmapped memory\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
ABI_PushRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||||
|
|
||||||
@ -380,7 +396,6 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
|
|
||||||
LoadCPSR();
|
LoadCPSR();
|
||||||
|
|
||||||
// TODO: this is ugly as a whole, do better
|
|
||||||
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
||||||
|
|
||||||
for (int i = 0; i < instrsCount; i++)
|
for (int i = 0; i < instrsCount; i++)
|
||||||
@ -388,21 +403,25 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
CurInstr = instrs[i];
|
CurInstr = instrs[i];
|
||||||
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
||||||
|
|
||||||
|
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
|
||||||
|
|
||||||
CompileFunc comp = Thumb
|
CompileFunc comp = Thumb
|
||||||
? T_Comp[CurInstr.Info.Kind]
|
? T_Comp[CurInstr.Info.Kind]
|
||||||
: A_Comp[CurInstr.Info.Kind];
|
: A_Comp[CurInstr.Info.Kind];
|
||||||
|
|
||||||
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
|
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
|
||||||
if (comp == NULL || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
|
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
|
||||||
{
|
{
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
|
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
|
||||||
|
|
||||||
if (comp == NULL)
|
if (comp == NULL)
|
||||||
|
{
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
||||||
|
|
||||||
SaveCPSR();
|
SaveCPSR();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (comp != NULL)
|
if (comp != NULL)
|
||||||
RegCache.Prepare(Thumb, i);
|
RegCache.Prepare(Thumb, i);
|
||||||
else
|
else
|
||||||
@ -410,12 +429,11 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
|
|
||||||
if (Thumb)
|
if (Thumb)
|
||||||
{
|
{
|
||||||
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
|
|
||||||
if (comp == NULL)
|
if (comp == NULL)
|
||||||
{
|
{
|
||||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
|
|
||||||
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
|
ABI_CallFunction(InterpretTHUMB[CurInstr.Info.Kind]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
(this->*comp)();
|
(this->*comp)();
|
||||||
@ -434,7 +452,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (cond == 0xF)
|
else if (cond == 0xF)
|
||||||
|
{
|
||||||
Comp_AddCycles_C();
|
Comp_AddCycles_C();
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
IrregularCycles = false;
|
IrregularCycles = false;
|
||||||
@ -443,25 +463,36 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
if (cond < 0xE)
|
if (cond < 0xE)
|
||||||
skipExecute = CheckCondition(cond);
|
skipExecute = CheckCondition(cond);
|
||||||
|
|
||||||
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
|
|
||||||
if (comp == NULL)
|
if (comp == NULL)
|
||||||
{
|
{
|
||||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
|
|
||||||
ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]);
|
ABI_CallFunction(InterpretARM[CurInstr.Info.Kind]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
(this->*comp)();
|
(this->*comp)();
|
||||||
|
|
||||||
|
Comp_SpecialBranchBehaviour();
|
||||||
|
|
||||||
if (CurInstr.Cond() < 0xE)
|
if (CurInstr.Cond() < 0xE)
|
||||||
{
|
{
|
||||||
if (IrregularCycles)
|
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
|
||||||
{
|
{
|
||||||
FixupBranch skipFailed = J();
|
FixupBranch skipFailed = J();
|
||||||
SetJumpTarget(skipExecute);
|
SetJumpTarget(skipExecute);
|
||||||
|
|
||||||
Comp_AddCycles_C(true);
|
Comp_AddCycles_C(true);
|
||||||
|
|
||||||
|
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||||
|
{
|
||||||
|
RegCache.PrepareExit();
|
||||||
|
SaveCPSR(false);
|
||||||
|
|
||||||
|
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||||
|
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||||
|
RET();
|
||||||
|
}
|
||||||
|
|
||||||
SetJumpTarget(skipFailed);
|
SetJumpTarget(skipFailed);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -483,6 +514,12 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||||||
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
ABI_PopRegistersAndAdjustStack(BitSet32(ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS & ~BitSet32({RSP})), 8);
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
|
/*FILE* codeout = fopen("codeout", "a");
|
||||||
|
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
|
||||||
|
fwrite((u8*)res, GetWritableCodePtr() - (u8*)res, 1, codeout);
|
||||||
|
|
||||||
|
fclose(codeout);*/
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -528,4 +565,89 @@ void Compiler::Comp_AddCycles_CI(Gen::X64Reg i, int add)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Compiler::Comp_AddCycles_CDI()
|
||||||
|
{
|
||||||
|
if (Num == 0)
|
||||||
|
Comp_AddCycles_CD();
|
||||||
|
else
|
||||||
|
{
|
||||||
|
IrregularCycles = true;
|
||||||
|
|
||||||
|
s32 cycles;
|
||||||
|
|
||||||
|
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||||
|
s32 numD = CurInstr.DataCycles;
|
||||||
|
|
||||||
|
if (CurInstr.DataRegion == 0x02) // mainRAM
|
||||||
|
{
|
||||||
|
if (CodeRegion == 0x02)
|
||||||
|
cycles = numC + numD;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
numC++;
|
||||||
|
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (CodeRegion == 0x02)
|
||||||
|
{
|
||||||
|
numD++;
|
||||||
|
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cycles = numC + numD + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%x: %d %d cycles cdi (%d)\n", CurInstr.Instr, Num, CurInstr.DataCycles, cycles);
|
||||||
|
|
||||||
|
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
|
else
|
||||||
|
ConstantCycles += cycles;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::Comp_AddCycles_CD()
|
||||||
|
{
|
||||||
|
u32 cycles = 0;
|
||||||
|
if (Num == 0)
|
||||||
|
{
|
||||||
|
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
|
||||||
|
s32 numD = CurInstr.DataCycles;
|
||||||
|
|
||||||
|
//if (DataRegion != CodeRegion)
|
||||||
|
cycles = std::max(numC + numD - 6, std::max(numC, numD));
|
||||||
|
|
||||||
|
IrregularCycles = cycles != numC;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||||
|
s32 numD = CurInstr.DataCycles;
|
||||||
|
|
||||||
|
if (CurInstr.DataRegion == 0x02)
|
||||||
|
{
|
||||||
|
if (CodeRegion == 0x02)
|
||||||
|
cycles += numC + numD;
|
||||||
|
else
|
||||||
|
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
|
}
|
||||||
|
else if (CodeRegion == 0x02)
|
||||||
|
{
|
||||||
|
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cycles += numC + numD;
|
||||||
|
}
|
||||||
|
|
||||||
|
IrregularCycles = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
|
else
|
||||||
|
ConstantCycles += cycles;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -4,6 +4,7 @@
|
|||||||
#include "../dolphin/x64Emitter.h"
|
#include "../dolphin/x64Emitter.h"
|
||||||
|
|
||||||
#include "../ARMJIT.h"
|
#include "../ARMJIT.h"
|
||||||
|
#include "../ARMJIT_Internal.h"
|
||||||
#include "../ARMJIT_RegisterCache.h"
|
#include "../ARMJIT_RegisterCache.h"
|
||||||
|
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
@ -16,6 +17,32 @@ const Gen::X64Reg RSCRATCH = Gen::EAX;
|
|||||||
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
|
const Gen::X64Reg RSCRATCH2 = Gen::EDX;
|
||||||
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
|
const Gen::X64Reg RSCRATCH3 = Gen::ECX;
|
||||||
|
|
||||||
|
struct ComplexOperand
|
||||||
|
{
|
||||||
|
ComplexOperand()
|
||||||
|
{}
|
||||||
|
|
||||||
|
ComplexOperand(u32 imm)
|
||||||
|
: IsImm(true), Imm(imm)
|
||||||
|
{}
|
||||||
|
ComplexOperand(int reg, int op, int amount)
|
||||||
|
: IsImm(false)
|
||||||
|
{
|
||||||
|
Reg.Reg = reg;
|
||||||
|
Reg.Op = op;
|
||||||
|
Reg.Amount = amount;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsImm;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
int Reg, Op, Amount;
|
||||||
|
} Reg;
|
||||||
|
u32 Imm;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
class Compiler : public Gen::XEmitter
|
class Compiler : public Gen::XEmitter
|
||||||
{
|
{
|
||||||
@ -24,7 +51,7 @@ public:
|
|||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount);
|
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||||
|
|
||||||
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
void LoadReg(int reg, Gen::X64Reg nativeReg);
|
||||||
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
void SaveReg(int reg, Gen::X64Reg nativeReg);
|
||||||
@ -39,6 +66,8 @@ public:
|
|||||||
void Comp_AddCycles_C(bool forceNonConstant = false);
|
void Comp_AddCycles_C(bool forceNonConstant = false);
|
||||||
void Comp_AddCycles_CI(u32 i);
|
void Comp_AddCycles_CI(u32 i);
|
||||||
void Comp_AddCycles_CI(Gen::X64Reg i, int add);
|
void Comp_AddCycles_CI(Gen::X64Reg i, int add);
|
||||||
|
void Comp_AddCycles_CDI();
|
||||||
|
void Comp_AddCycles_CD();
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
@ -92,8 +121,17 @@ public:
|
|||||||
void T_Comp_BL_LONG_2();
|
void T_Comp_BL_LONG_2();
|
||||||
void T_Comp_BL_Merged();
|
void T_Comp_BL_Merged();
|
||||||
|
|
||||||
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
|
enum
|
||||||
|
{
|
||||||
|
memop_Writeback = 1 << 0,
|
||||||
|
memop_Post = 1 << 1,
|
||||||
|
memop_SignExtend = 1 << 2,
|
||||||
|
memop_Store = 1 << 3,
|
||||||
|
memop_SubtractOffset = 1 << 4
|
||||||
|
};
|
||||||
|
void Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags);
|
||||||
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||||
|
void Comp_MemLoadLiteral(int size, int rd, u32 addr);
|
||||||
|
|
||||||
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
||||||
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
||||||
@ -105,8 +143,9 @@ public:
|
|||||||
|
|
||||||
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
|
||||||
|
|
||||||
|
void Comp_SpecialBranchBehaviour();
|
||||||
|
|
||||||
void* Gen_MemoryRoutine9(bool store, int size);
|
void* Gen_MemoryRoutine9(bool store, int size);
|
||||||
void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
|
|
||||||
|
|
||||||
void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
|
void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
|
||||||
void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
|
void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
|
||||||
@ -117,10 +156,9 @@ public:
|
|||||||
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||||
|
|
||||||
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
|
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
|
||||||
Gen::OpArg A_Comp_GetMemWBOffset();
|
|
||||||
|
|
||||||
void LoadCPSR();
|
void LoadCPSR();
|
||||||
void SaveCPSR();
|
void SaveCPSR(bool flagClean = true);
|
||||||
|
|
||||||
bool FlagsNZRequired()
|
bool FlagsNZRequired()
|
||||||
{ return CurInstr.SetFlags & 0xC; }
|
{ return CurInstr.SetFlags & 0xC; }
|
||||||
@ -139,10 +177,11 @@ public:
|
|||||||
u8* ResetStart;
|
u8* ResetStart;
|
||||||
u32 CodeMemSize;
|
u32 CodeMemSize;
|
||||||
|
|
||||||
|
bool Exit;
|
||||||
bool IrregularCycles;
|
bool IrregularCycles;
|
||||||
|
|
||||||
void* MemoryFuncs9[3][2];
|
void* MemoryFuncs9[3][2];
|
||||||
void* MemoryFuncs7[3][2][2];
|
void* MemoryFuncs7[3][2];
|
||||||
|
|
||||||
void* MemoryFuncsSeq9[2][2];
|
void* MemoryFuncsSeq9[2][2];
|
||||||
void* MemoryFuncsSeq7[2][2][2];
|
void* MemoryFuncsSeq7[2][2][2];
|
||||||
|
@ -27,51 +27,7 @@ int squeezePointer(T* ptr)
|
|||||||
/*
|
/*
|
||||||
address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
|
address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
|
||||||
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
||||||
code cycles - ABI_PARAM3
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define CALC_CYCLES_9(numC, numD, scratch) \
|
|
||||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -6)); \
|
|
||||||
CMP(32, R(numC), R(numD)); \
|
|
||||||
CMOVcc(32, numD, R(numC), CC_G); \
|
|
||||||
CMP(32, R(numD), R(scratch)); \
|
|
||||||
CMOVcc(32, scratch, R(numD), CC_G); \
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch));
|
|
||||||
#define CALC_CYCLES_7_DATA_MAIN_RAM(numC, numD, scratch) \
|
|
||||||
if (codeMainRAM) \
|
|
||||||
{ \
|
|
||||||
LEA(32, scratch, MRegSum(numD, numC)); \
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
if (!store) \
|
|
||||||
ADD(32, R(numC), Imm8(1)); \
|
|
||||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
|
||||||
CMP(32, R(numD), R(numC)); \
|
|
||||||
CMOVcc(32, numC, R(numD), CC_G); \
|
|
||||||
CMP(32, R(numC), R(scratch)); \
|
|
||||||
CMOVcc(32, scratch, R(numC), CC_G); \
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
|
||||||
}
|
|
||||||
#define CALC_CYCLES_7_DATA_NON_MAIN_RAM(numC, numD, scratch) \
|
|
||||||
if (codeMainRAM) \
|
|
||||||
{ \
|
|
||||||
if (!store) \
|
|
||||||
ADD(32, R(numD), Imm8(1)); \
|
|
||||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
|
||||||
CMP(32, R(numD), R(numC)); \
|
|
||||||
CMOVcc(32, numC, R(numD), CC_G); \
|
|
||||||
CMP(32, R(numC), R(scratch)); \
|
|
||||||
CMOVcc(32, scratch, R(numC), CC_G); \
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
|
||||||
} \
|
|
||||||
else \
|
|
||||||
{ \
|
|
||||||
LEA(32, scratch, MComplex(numD, numC, SCALE_1, store ? 0 : 1)); \
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
|
||||||
}
|
|
||||||
|
|
||||||
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
{
|
{
|
||||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
||||||
@ -86,12 +42,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
|||||||
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
||||||
FixupBranch insideITCM = J_CC(CC_B);
|
FixupBranch insideITCM = J_CC(CC_B);
|
||||||
|
|
||||||
// cycle counting!
|
|
||||||
MOV(32, R(ABI_PARAM4), R(ABI_PARAM1));
|
|
||||||
SHR(32, R(ABI_PARAM4), Imm8(12));
|
|
||||||
MOVZX(32, 8, ABI_PARAM4, MComplex(RCPU, ABI_PARAM4, SCALE_4, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 1)));
|
|
||||||
CALC_CYCLES_9(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
|
||||||
|
|
||||||
if (store)
|
if (store)
|
||||||
{
|
{
|
||||||
if (size > 8)
|
if (size > 8)
|
||||||
@ -127,7 +77,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
SetJumpTarget(insideDTCM);
|
SetJumpTarget(insideDTCM);
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
||||||
if (store)
|
if (store)
|
||||||
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
||||||
@ -146,16 +95,22 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
|||||||
RET();
|
RET();
|
||||||
|
|
||||||
SetJumpTarget(insideITCM);
|
SetJumpTarget(insideITCM);
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
|
||||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
||||||
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
||||||
if (store)
|
if (store)
|
||||||
{
|
{
|
||||||
MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
|
MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
|
||||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
|
||||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(RSCRATCH));
|
// if CodeRanges[pseudoPhysical/256].Blocks.Length > 0 we're writing into code!
|
||||||
if (size == 32)
|
static_assert(sizeof(AddressRange) == 16);
|
||||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(RSCRATCH));
|
LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(8));
|
||||||
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
|
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||||
|
FixupBranch noCode = J_CC(CC_Z);
|
||||||
|
JMP((u8*)InvalidateByAddr, true);
|
||||||
|
SetJumpTarget(noCode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -176,83 +131,6 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
|
||||||
{
|
|
||||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
|
||||||
AlignCode4();
|
|
||||||
void* res = GetWritableCodePtr();
|
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
|
||||||
SHR(32, R(RSCRATCH), Imm8(15));
|
|
||||||
MOVZX(32, 8, ABI_PARAM4, MScaled(RSCRATCH, SCALE_4, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
|
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
|
||||||
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
|
||||||
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
|
||||||
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
|
||||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
|
|
||||||
AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
|
|
||||||
if (store)
|
|
||||||
{
|
|
||||||
MOV(size, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)), R(ABI_PARAM2));
|
|
||||||
XOR(32, R(RSCRATCH), R(RSCRATCH));
|
|
||||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM)), R(RSCRATCH));
|
|
||||||
if (size == 32)
|
|
||||||
MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM) + 8), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOVZX(32, size, RSCRATCH, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)));
|
|
||||||
if (size == 32)
|
|
||||||
{
|
|
||||||
if (ABI_PARAM1 != ECX)
|
|
||||||
MOV(32, R(ECX), R(ABI_PARAM1));
|
|
||||||
AND(32, R(ECX), Imm8(3));
|
|
||||||
SHL(32, R(ECX), Imm8(3));
|
|
||||||
ROR_(32, R(RSCRATCH), R(ECX));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RET();
|
|
||||||
|
|
||||||
SetJumpTarget(outsideMainRAM);
|
|
||||||
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
|
||||||
if (store)
|
|
||||||
{
|
|
||||||
if (size > 8)
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
|
||||||
switch (size)
|
|
||||||
{
|
|
||||||
case 32: JMP((u8*)NDS::ARM7Write32, true); break;
|
|
||||||
case 16: JMP((u8*)NDS::ARM7Write16, true); break;
|
|
||||||
case 8: JMP((u8*)NDS::ARM7Write8, true); break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (size == 32)
|
|
||||||
{
|
|
||||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
|
||||||
ABI_CallFunction(NDS::ARM7Read32);
|
|
||||||
ABI_PopRegistersAndAdjustStack({ECX}, 8);
|
|
||||||
AND(32, R(ECX), Imm8(3));
|
|
||||||
SHL(32, R(ECX), Imm8(3));
|
|
||||||
ROR_(32, R(RSCRATCH), R(ECX));
|
|
||||||
RET();
|
|
||||||
}
|
|
||||||
else if (size == 16)
|
|
||||||
{
|
|
||||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
|
||||||
JMP((u8*)NDS::ARM7Read16, true);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
JMP((u8*)NDS::ARM7Read8, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MEMORY_SEQ_WHILE_COND \
|
#define MEMORY_SEQ_WHILE_COND \
|
||||||
if (!store) \
|
if (!store) \
|
||||||
MOV(32, currentElement, R(EAX));\
|
MOV(32, currentElement, R(EAX));\
|
||||||
@ -266,24 +144,13 @@ void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
|||||||
ABI_PARAM1 address
|
ABI_PARAM1 address
|
||||||
ABI_PARAM2 address where registers are stored
|
ABI_PARAM2 address where registers are stored
|
||||||
ABI_PARAM3 how many values to read/write
|
ABI_PARAM3 how many values to read/write
|
||||||
ABI_PARAM4 code cycles
|
|
||||||
|
|
||||||
Dolphin x64CodeEmitter is my favourite assembler
|
Dolphin x64CodeEmitter is my favourite assembler
|
||||||
*/
|
*/
|
||||||
void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||||
{
|
{
|
||||||
const u8* zero = GetCodePtr();
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
|
||||||
RET();
|
|
||||||
|
|
||||||
void* res = (void*)GetWritableCodePtr();
|
void* res = (void*)GetWritableCodePtr();
|
||||||
|
|
||||||
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
|
||||||
J_CC(CC_Z, zero);
|
|
||||||
|
|
||||||
PUSH(ABI_PARAM3);
|
|
||||||
PUSH(ABI_PARAM4); // we need you later
|
|
||||||
|
|
||||||
const u8* repeat = GetCodePtr();
|
const u8* repeat = GetCodePtr();
|
||||||
|
|
||||||
if (preinc)
|
if (preinc)
|
||||||
@ -311,12 +178,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
|||||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
|
||||||
MEMORY_SEQ_WHILE_COND
|
MEMORY_SEQ_WHILE_COND
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
RET();
|
||||||
SHR(32, R(RSCRATCH), Imm8(12));
|
|
||||||
MOVZX(32, 8, ABI_PARAM2, MComplex(RCPU, RSCRATCH, SCALE_4, 2 + offsetof(ARMv5, MemTimings)));
|
|
||||||
MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_4, 3 + offsetof(ARMv5, MemTimings)));
|
|
||||||
|
|
||||||
FixupBranch finishIt1 = J();
|
|
||||||
|
|
||||||
SetJumpTarget(insideDTCM);
|
SetJumpTarget(insideDTCM);
|
||||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
|
AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
|
||||||
@ -329,9 +191,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
|||||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
||||||
|
|
||||||
MEMORY_SEQ_WHILE_COND
|
MEMORY_SEQ_WHILE_COND
|
||||||
MOV(32, R(RSCRATCH), Imm32(1)); // sequential access time
|
RET();
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(1)); // non sequential
|
|
||||||
FixupBranch finishIt2 = J();
|
|
||||||
|
|
||||||
SetJumpTarget(insideITCM);
|
SetJumpTarget(insideITCM);
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
@ -340,31 +200,23 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
|||||||
{
|
{
|
||||||
MOV(32, R(ABI_PARAM4), currentElement);
|
MOV(32, R(ABI_PARAM4), currentElement);
|
||||||
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
|
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
|
||||||
XOR(32, R(ABI_PARAM4), R(ABI_PARAM4));
|
|
||||||
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(ABI_PARAM4));
|
ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
|
||||||
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(ABI_PARAM4));
|
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(8));
|
||||||
|
SHL(32, R(RSCRATCH), Imm8(4));
|
||||||
|
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||||
|
FixupBranch noCode = J_CC(CC_Z);
|
||||||
|
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
||||||
|
CALL((u8*)InvalidateByAddr);
|
||||||
|
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
SetJumpTarget(noCode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
|
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
|
||||||
|
|
||||||
MEMORY_SEQ_WHILE_COND
|
MEMORY_SEQ_WHILE_COND
|
||||||
MOV(32, R(RSCRATCH), Imm32(1));
|
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(1));
|
|
||||||
|
|
||||||
SetJumpTarget(finishIt1);
|
|
||||||
SetJumpTarget(finishIt2);
|
|
||||||
|
|
||||||
POP(ABI_PARAM4);
|
|
||||||
POP(ABI_PARAM3);
|
|
||||||
|
|
||||||
CMP(32, R(ABI_PARAM3), Imm8(1));
|
|
||||||
FixupBranch skipSequential = J_CC(CC_E);
|
|
||||||
SUB(32, R(ABI_PARAM3), Imm8(1));
|
|
||||||
IMUL(32, RSCRATCH, R(ABI_PARAM3));
|
|
||||||
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
|
||||||
SetJumpTarget(skipSequential);
|
|
||||||
|
|
||||||
CALC_CYCLES_9(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@ -372,18 +224,8 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
|||||||
|
|
||||||
void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
||||||
{
|
{
|
||||||
const u8* zero = GetCodePtr();
|
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
|
||||||
RET();
|
|
||||||
|
|
||||||
void* res = (void*)GetWritableCodePtr();
|
void* res = (void*)GetWritableCodePtr();
|
||||||
|
|
||||||
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
|
||||||
J_CC(CC_Z, zero);
|
|
||||||
|
|
||||||
PUSH(ABI_PARAM3);
|
|
||||||
PUSH(ABI_PARAM4); // we need you later
|
|
||||||
|
|
||||||
const u8* repeat = GetCodePtr();
|
const u8* repeat = GetCodePtr();
|
||||||
|
|
||||||
if (preinc)
|
if (preinc)
|
||||||
@ -403,59 +245,227 @@ void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
|||||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
|
||||||
MEMORY_SEQ_WHILE_COND
|
MEMORY_SEQ_WHILE_COND
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
|
||||||
SHR(32, R(RSCRATCH), Imm8(15));
|
|
||||||
MOVZX(32, 8, ABI_PARAM2, MScaled(RSCRATCH, SCALE_4, 2 + squeezePointer(NDS::ARM7MemTimings)));
|
|
||||||
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_4, 3 + squeezePointer(NDS::ARM7MemTimings)));
|
|
||||||
|
|
||||||
POP(ABI_PARAM4);
|
|
||||||
POP(ABI_PARAM3);
|
|
||||||
|
|
||||||
// TODO: optimise this
|
|
||||||
CMP(32, R(ABI_PARAM3), Imm8(1));
|
|
||||||
FixupBranch skipSequential = J_CC(CC_E);
|
|
||||||
SUB(32, R(ABI_PARAM3), Imm8(1));
|
|
||||||
IMUL(32, RSCRATCH, R(ABI_PARAM3));
|
|
||||||
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
|
||||||
SetJumpTarget(skipSequential);
|
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
|
||||||
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
|
||||||
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
|
||||||
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
|
||||||
RET();
|
|
||||||
|
|
||||||
SetJumpTarget(outsideMainRAM);
|
|
||||||
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CALC_CYCLES_9
|
|
||||||
#undef MEMORY_SEQ_WHILE_COND
|
#undef MEMORY_SEQ_WHILE_COND
|
||||||
|
|
||||||
void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size)
|
void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||||
{
|
{
|
||||||
IrregularCycles = true;
|
u32 val;
|
||||||
|
// make sure arm7 bios is accessible
|
||||||
if (store)
|
u32 tmpR15 = CurCPU->R[15];
|
||||||
MOV(32, R(ABI_PARAM2), rd);
|
CurCPU->R[15] = R15;
|
||||||
u32 cycles = Num
|
if (size == 32)
|
||||||
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
|
||||||
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
|
|
||||||
MOV(32, R(ABI_PARAM3), Imm32(cycles));
|
|
||||||
CALL(Num == 0
|
|
||||||
? MemoryFuncs9[size >> 4][store]
|
|
||||||
: MemoryFuncs7[size >> 4][store][CodeRegion == 0x02]);
|
|
||||||
|
|
||||||
if (!store)
|
|
||||||
{
|
{
|
||||||
if (signExtend)
|
CurCPU->DataRead32(addr & ~0x3, &val);
|
||||||
MOVSX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
|
val = ROR(val, (addr & 0x3) << 3);
|
||||||
|
}
|
||||||
|
else if (size == 16)
|
||||||
|
CurCPU->DataRead16(addr & ~0x1, &val);
|
||||||
|
else
|
||||||
|
CurCPU->DataRead8(addr, &val);
|
||||||
|
CurCPU->R[15] = tmpR15;
|
||||||
|
|
||||||
|
MOV(32, MapReg(rd), Imm32(val));
|
||||||
|
|
||||||
|
if (Thumb || CurInstr.Cond() == 0xE)
|
||||||
|
RegCache.PutLiteral(rd, val);
|
||||||
|
|
||||||
|
Comp_AddCycles_CDI();
|
||||||
|
}
|
||||||
|
|
||||||
|
void fault(u32 a, u32 b)
|
||||||
|
{
|
||||||
|
printf("actually not static! %x %x\n", a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||||
|
{
|
||||||
|
if (flags & memop_Store)
|
||||||
|
{
|
||||||
|
Comp_AddCycles_CD();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Comp_AddCycles_CDI();
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 addressMask = ~0;
|
||||||
|
if (size == 32)
|
||||||
|
addressMask = ~3;
|
||||||
|
if (size == 16)
|
||||||
|
addressMask = ~1;
|
||||||
|
|
||||||
|
if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
|
||||||
|
{
|
||||||
|
Comp_MemLoadLiteral(size, rd,
|
||||||
|
R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
OpArg rdMapped = MapReg(rd);
|
||||||
|
OpArg rnMapped = MapReg(rn);
|
||||||
|
|
||||||
|
bool inlinePreparation = Num == 1;
|
||||||
|
u32 constLocalROR32 = 4;
|
||||||
|
|
||||||
|
void* memoryFunc = Num == 0
|
||||||
|
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
|
||||||
|
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
|
||||||
|
|
||||||
|
if ((rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
|
||||||
|
{
|
||||||
|
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||||
|
|
||||||
|
/*MOV(32, R(ABI_PARAM1), Imm32(CurInstr.Instr));
|
||||||
|
MOV(32, R(ABI_PARAM1), Imm32(R15));
|
||||||
|
MOV_sum(32, RSCRATCH, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
|
||||||
|
CMP(32, R(RSCRATCH), Imm32(addr));
|
||||||
|
FixupBranch eq = J_CC(CC_E);
|
||||||
|
CALL((void*)fault);
|
||||||
|
SetJumpTarget(eq);*/
|
||||||
|
|
||||||
|
NDS::MemRegion region;
|
||||||
|
region.Mem = NULL;
|
||||||
|
if (Num == 0)
|
||||||
|
{
|
||||||
|
ARMv5* cpu5 = (ARMv5*)CurCPU;
|
||||||
|
|
||||||
|
// stupid dtcm...
|
||||||
|
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
|
||||||
|
{
|
||||||
|
region.Mem = cpu5->DTCM;
|
||||||
|
region.Mask = 0x3FFF;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||||
|
|
||||||
|
if (region.Mem != NULL)
|
||||||
|
{
|
||||||
|
void* ptr = ®ion.Mem[addr & addressMask & region.Mask];
|
||||||
|
|
||||||
|
if (flags & memop_Store)
|
||||||
|
{
|
||||||
|
MOV(size, M(ptr), MapReg(rd));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (flags & memop_SignExtend)
|
||||||
|
MOVSX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||||
|
else
|
||||||
|
MOVZX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||||
|
|
||||||
|
if (size == 32 && addr & ~0x3)
|
||||||
|
{
|
||||||
|
ROR_(32, rdMapped, Imm8((addr & 0x3) << 3));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
|
||||||
|
if (specialFunc)
|
||||||
|
{
|
||||||
|
memoryFunc = specialFunc;
|
||||||
|
inlinePreparation = true;
|
||||||
|
constLocalROR32 = addr & 0x3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
X64Reg finalAddr = ABI_PARAM1;
|
||||||
|
if (flags & memop_Post)
|
||||||
|
{
|
||||||
|
MOV(32, R(ABI_PARAM1), rnMapped);
|
||||||
|
|
||||||
|
finalAddr = rnMapped.GetSimpleReg();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op2.IsImm)
|
||||||
|
{
|
||||||
|
MOV_sum(32, finalAddr, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
MOVZX(32, size, rd.GetSimpleReg(), R(RSCRATCH));
|
{
|
||||||
|
OpArg rm = MapReg(op2.Reg.Reg);
|
||||||
|
|
||||||
|
if (!(flags & memop_SubtractOffset) && rm.IsSimpleReg() && rnMapped.IsSimpleReg()
|
||||||
|
&& op2.Reg.Op == 0 && op2.Reg.Amount > 0 && op2.Reg.Amount <= 3)
|
||||||
|
{
|
||||||
|
LEA(32, finalAddr,
|
||||||
|
MComplex(rnMapped.GetSimpleReg(), rm.GetSimpleReg(), 1 << op2.Reg.Amount, 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bool throwAway;
|
||||||
|
OpArg offset =
|
||||||
|
Comp_RegShiftImm(op2.Reg.Op, op2.Reg.Amount, rm, false, throwAway);
|
||||||
|
|
||||||
|
if (flags & memop_SubtractOffset)
|
||||||
|
{
|
||||||
|
MOV(32, R(finalAddr), rnMapped);
|
||||||
|
if (!offset.IsZero())
|
||||||
|
SUB(32, R(finalAddr), offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MOV_sum(32, finalAddr, rnMapped, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((flags & memop_Writeback) && !(flags & memop_Post))
|
||||||
|
MOV(32, rnMapped, R(finalAddr));
|
||||||
|
|
||||||
|
if (flags & memop_Store)
|
||||||
|
MOV(32, R(ABI_PARAM2), rdMapped);
|
||||||
|
|
||||||
|
if (!(flags & memop_Store) && inlinePreparation && constLocalROR32 == 4 && size == 32)
|
||||||
|
MOV(32, rdMapped, R(ABI_PARAM1));
|
||||||
|
|
||||||
|
if (inlinePreparation && size > 8)
|
||||||
|
AND(32, R(ABI_PARAM1), Imm8(addressMask));
|
||||||
|
|
||||||
|
CALL(memoryFunc);
|
||||||
|
|
||||||
|
if (!(flags & memop_Store))
|
||||||
|
{
|
||||||
|
if (inlinePreparation && size == 32)
|
||||||
|
{
|
||||||
|
if (constLocalROR32 == 4)
|
||||||
|
{
|
||||||
|
static_assert(RSCRATCH3 == ECX);
|
||||||
|
MOV(32, R(ECX), rdMapped);
|
||||||
|
AND(32, R(ECX), Imm8(3));
|
||||||
|
SHL(32, R(ECX), Imm8(3));
|
||||||
|
ROR_(32, R(RSCRATCH), R(ECX));
|
||||||
|
}
|
||||||
|
else if (constLocalROR32 != 0)
|
||||||
|
ROR_(32, R(RSCRATCH), Imm8(constLocalROR32 << 3));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags & memop_SignExtend)
|
||||||
|
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||||
|
else
|
||||||
|
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(flags & memop_Store) && rd == 15)
|
||||||
|
{
|
||||||
|
if (size < 32)
|
||||||
|
printf("!!! LDR <32 bit PC %08X %x\n", R15, CurInstr.Instr);
|
||||||
|
{
|
||||||
|
if (Num == 1)
|
||||||
|
AND(32, rdMapped, Imm8(0xFE)); // immediate is sign extended
|
||||||
|
Comp_JumpTo(rdMapped.GetSimpleReg());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -475,16 +485,13 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||||||
|
|
||||||
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
|
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
|
||||||
|
|
||||||
u32 cycles = Num
|
|
||||||
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
|
||||||
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
|
|
||||||
|
|
||||||
// we need to make sure that the stack stays aligned to 16 bytes
|
// we need to make sure that the stack stays aligned to 16 bytes
|
||||||
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
|
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
|
||||||
|
|
||||||
MOV(32, R(ABI_PARAM4), Imm32(cycles));
|
|
||||||
if (!store)
|
if (!store)
|
||||||
{
|
{
|
||||||
|
Comp_AddCycles_CDI();
|
||||||
|
|
||||||
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||||
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
||||||
MOV(64, R(ABI_PARAM2), R(RSP));
|
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||||
@ -548,6 +555,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
Comp_AddCycles_CD();
|
||||||
|
|
||||||
if (regsCount & 1)
|
if (regsCount & 1)
|
||||||
PUSH(RSCRATCH);
|
PUSH(RSCRATCH);
|
||||||
|
|
||||||
@ -594,81 +603,45 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
OpArg Compiler::A_Comp_GetMemWBOffset()
|
|
||||||
|
void Compiler::A_Comp_MemWB()
|
||||||
{
|
{
|
||||||
|
bool load = CurInstr.Instr & (1 << 20);
|
||||||
|
bool byte = CurInstr.Instr & (1 << 22);
|
||||||
|
int size = byte ? 8 : 32;
|
||||||
|
|
||||||
|
int flags = 0;
|
||||||
|
if (!load)
|
||||||
|
flags |= memop_Store;
|
||||||
|
if (!(CurInstr.Instr & (1 << 24)))
|
||||||
|
flags |= memop_Post;
|
||||||
|
if (CurInstr.Instr & (1 << 21))
|
||||||
|
flags |= memop_Writeback;
|
||||||
|
if (!(CurInstr.Instr & (1 << 23)))
|
||||||
|
flags |= memop_SubtractOffset;
|
||||||
|
|
||||||
|
ComplexOperand offset;
|
||||||
if (!(CurInstr.Instr & (1 << 25)))
|
if (!(CurInstr.Instr & (1 << 25)))
|
||||||
{
|
{
|
||||||
u32 imm = CurInstr.Instr & 0xFFF;
|
offset = ComplexOperand(CurInstr.Instr & 0xFFF);
|
||||||
return Imm32(imm);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||||
int amount = (CurInstr.Instr >> 7) & 0x1F;
|
int amount = (CurInstr.Instr >> 7) & 0x1F;
|
||||||
OpArg rm = MapReg(CurInstr.A_Reg(0));
|
int rm = CurInstr.A_Reg(0);
|
||||||
bool carryUsed;
|
|
||||||
|
|
||||||
return Comp_RegShiftImm(op, amount, rm, false, carryUsed);
|
offset = ComplexOperand(rm, op, amount);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Compiler::A_Comp_MemWB()
|
|
||||||
{
|
|
||||||
OpArg rn = MapReg(CurInstr.A_Reg(16));
|
|
||||||
OpArg rd = MapReg(CurInstr.A_Reg(12));
|
|
||||||
bool load = CurInstr.Instr & (1 << 20);
|
|
||||||
bool byte = CurInstr.Instr & (1 << 22);
|
|
||||||
int size = byte ? 8 : 32;
|
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 24))
|
|
||||||
{
|
|
||||||
OpArg offset = A_Comp_GetMemWBOffset();
|
|
||||||
if (CurInstr.Instr & (1 << 23))
|
|
||||||
MOV_sum(32, ABI_PARAM1, rn, offset);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(32, R(ABI_PARAM1), rn);
|
|
||||||
SUB(32, R(ABI_PARAM1), offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 21))
|
|
||||||
MOV(32, rn, R(ABI_PARAM1));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
MOV(32, R(ABI_PARAM1), rn);
|
|
||||||
|
|
||||||
if (!(CurInstr.Instr & (1 << 24)))
|
|
||||||
{
|
|
||||||
OpArg offset = A_Comp_GetMemWBOffset();
|
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 23))
|
|
||||||
ADD(32, rn, offset);
|
|
||||||
else
|
|
||||||
SUB(32, rn, offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
|
||||||
if (load && CurInstr.A_Reg(12) == 15)
|
|
||||||
{
|
|
||||||
if (byte)
|
|
||||||
printf("!!! LDRB PC %08X\n", R15);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (Num == 1)
|
|
||||||
AND(32, rd, Imm8(0xFE)); // immediate is sign extended
|
|
||||||
Comp_JumpTo(rd.GetSimpleReg());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::A_Comp_MemHalf()
|
void Compiler::A_Comp_MemHalf()
|
||||||
{
|
{
|
||||||
OpArg rn = MapReg(CurInstr.A_Reg(16));
|
ComplexOperand offset = CurInstr.Instr & (1 << 22)
|
||||||
OpArg rd = MapReg(CurInstr.A_Reg(12));
|
? ComplexOperand(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
||||||
|
: ComplexOperand(CurInstr.A_Reg(0), 0, 0);
|
||||||
OpArg offset = CurInstr.Instr & (1 << 22)
|
|
||||||
? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
|
||||||
: MapReg(CurInstr.A_Reg(0));
|
|
||||||
|
|
||||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||||
bool load = CurInstr.Instr & (1 << 20);
|
bool load = CurInstr.Instr & (1 << 20);
|
||||||
@ -689,49 +662,29 @@ void Compiler::A_Comp_MemHalf()
|
|||||||
if (size == 32 && Num == 1)
|
if (size == 32 && Num == 1)
|
||||||
return; // NOP
|
return; // NOP
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 24))
|
int flags = 0;
|
||||||
{
|
if (signExtend)
|
||||||
if (CurInstr.Instr & (1 << 23))
|
flags |= memop_SignExtend;
|
||||||
MOV_sum(32, ABI_PARAM1, rn, offset);
|
if (!load)
|
||||||
else
|
flags |= memop_Store;
|
||||||
{
|
|
||||||
MOV(32, R(ABI_PARAM1), rn);
|
|
||||||
SUB(32, R(ABI_PARAM1), offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 21))
|
|
||||||
MOV(32, rn, R(ABI_PARAM1));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
MOV(32, R(ABI_PARAM1), rn);
|
|
||||||
|
|
||||||
if (!(CurInstr.Instr & (1 << 24)))
|
if (!(CurInstr.Instr & (1 << 24)))
|
||||||
{
|
flags |= memop_Post;
|
||||||
if (CurInstr.Instr & (1 << 23))
|
if (!(CurInstr.Instr & (1 << 23)))
|
||||||
ADD(32, rn, offset);
|
flags |= memop_SubtractOffset;
|
||||||
else
|
if (CurInstr.Instr & (1 << 21))
|
||||||
SUB(32, rn, offset);
|
flags |= memop_Writeback;
|
||||||
}
|
|
||||||
|
|
||||||
Comp_MemAccess(rd, signExtend, !load, size);
|
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
|
||||||
|
|
||||||
if (load && CurInstr.A_Reg(12) == 15)
|
|
||||||
printf("!!! MemHalf op PC %08X\n", R15);;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemReg()
|
void Compiler::T_Comp_MemReg()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
|
||||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
|
||||||
OpArg ro = MapReg(CurInstr.T_Reg(6));
|
|
||||||
|
|
||||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||||
bool load = op & 0x2;
|
bool load = op & 0x2;
|
||||||
bool byte = op & 0x1;
|
bool byte = op & 0x1;
|
||||||
|
|
||||||
MOV_sum(32, ABI_PARAM1, rb, ro);
|
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(CurInstr.T_Reg(6), 0, 0),
|
||||||
|
byte ? 8 : 32, load ? 0 : memop_Store);
|
||||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::A_Comp_LDM_STM()
|
void Compiler::A_Comp_LDM_STM()
|
||||||
@ -758,67 +711,55 @@ void Compiler::A_Comp_LDM_STM()
|
|||||||
|
|
||||||
void Compiler::T_Comp_MemImm()
|
void Compiler::T_Comp_MemImm()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
|
||||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
|
||||||
|
|
||||||
int op = (CurInstr.Instr >> 11) & 0x3;
|
int op = (CurInstr.Instr >> 11) & 0x3;
|
||||||
bool load = op & 0x1;
|
bool load = op & 0x1;
|
||||||
bool byte = op & 0x2;
|
bool byte = op & 0x2;
|
||||||
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
|
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
|
||||||
|
|
||||||
LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
|
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(offset),
|
||||||
|
byte ? 8 : 32, load ? 0 : memop_Store);
|
||||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemRegHalf()
|
void Compiler::T_Comp_MemRegHalf()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
|
||||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
|
||||||
OpArg ro = MapReg(CurInstr.T_Reg(6));
|
|
||||||
|
|
||||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||||
bool load = op != 0;
|
bool load = op != 0;
|
||||||
int size = op != 1 ? 16 : 8;
|
int size = op != 1 ? 16 : 8;
|
||||||
bool signExtend = op & 1;
|
bool signExtend = op & 1;
|
||||||
|
|
||||||
MOV_sum(32, ABI_PARAM1, rb, ro);
|
int flags = 0;
|
||||||
|
if (signExtend)
|
||||||
|
flags |= memop_SignExtend;
|
||||||
|
if (!load)
|
||||||
|
flags |= memop_Store;
|
||||||
|
|
||||||
Comp_MemAccess(rd, signExtend, !load, size);
|
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(CurInstr.T_Reg(6), 0, 0),
|
||||||
|
size, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemImmHalf()
|
void Compiler::T_Comp_MemImmHalf()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
|
||||||
OpArg rb = MapReg(CurInstr.T_Reg(3));
|
|
||||||
|
|
||||||
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
|
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
|
||||||
bool load = CurInstr.Instr & (1 << 11);
|
bool load = CurInstr.Instr & (1 << 11);
|
||||||
|
|
||||||
LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset));
|
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), ComplexOperand(offset), 16,
|
||||||
|
load ? 0 : memop_Store);
|
||||||
Comp_MemAccess(rd, false, !load, 16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_LoadPCRel()
|
void Compiler::T_Comp_LoadPCRel()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
|
||||||
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
||||||
|
|
||||||
// hopefully this doesn't break
|
Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
|
||||||
u32 val; CurCPU->DataRead32(addr, &val);
|
|
||||||
MOV(32, rd, Imm32(val));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemSPRel()
|
void Compiler::T_Comp_MemSPRel()
|
||||||
{
|
{
|
||||||
u32 offset = (CurInstr.Instr & 0xFF) * 4;
|
u32 offset = (CurInstr.Instr & 0xFF) * 4;
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
|
||||||
bool load = CurInstr.Instr & (1 << 11);
|
bool load = CurInstr.Instr & (1 << 11);
|
||||||
|
|
||||||
LEA(32, ABI_PARAM1, MDisp(MapReg(13).GetSimpleReg(), offset));
|
Comp_MemAccess(CurInstr.T_Reg(8), 13, ComplexOperand(offset), 32,
|
||||||
|
load ? 0 : memop_Store);
|
||||||
Comp_MemAccess(rd, false, !load, 32);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_PUSH_POP()
|
void Compiler::T_Comp_PUSH_POP()
|
||||||
|
@ -36,7 +36,7 @@ enum {
|
|||||||
A_StaticShiftSetC = 1 << 18,
|
A_StaticShiftSetC = 1 << 18,
|
||||||
A_SetC = 1 << 19,
|
A_SetC = 1 << 19,
|
||||||
|
|
||||||
A_WriteMemory = 1 << 20,
|
A_WriteMem = 1 << 20
|
||||||
};
|
};
|
||||||
|
|
||||||
#define A_BIOP A_Read16
|
#define A_BIOP A_Read16
|
||||||
@ -109,7 +109,7 @@ const u32 A_UMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(
|
|||||||
const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
|
const u32 A_UMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_UMLAL);
|
||||||
const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
|
const u32 A_SMULL = A_MulFlags | A_Write16 | A_Write12 | A_Read0 | A_Read8 | ak(ak_SMULL);
|
||||||
const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
|
const u32 A_SMLAL = A_MulFlags | A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLAL);
|
||||||
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLALxy);
|
const u32 A_SMLAxy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAxy);
|
||||||
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
|
const u32 A_SMLAWy = A_Write16 | A_Read0 | A_Read8 | A_Read12 | ak(ak_SMLAWy);
|
||||||
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
|
const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
|
||||||
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
|
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
|
||||||
@ -123,7 +123,7 @@ const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDADD);
|
|||||||
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
|
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
|
||||||
|
|
||||||
#define A_LDR A_Write12
|
#define A_LDR A_Write12
|
||||||
#define A_STR A_Read12 | A_WriteMemory
|
#define A_STR A_Read12 | A_WriteMem
|
||||||
|
|
||||||
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
|
#define A_IMPLEMENT_WB_LDRSTR(x,k) \
|
||||||
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
|
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
|
||||||
@ -144,7 +144,7 @@ A_IMPLEMENT_WB_LDRSTR(LDR,LDR)
|
|||||||
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
|
A_IMPLEMENT_WB_LDRSTR(LDRB,LDR)
|
||||||
|
|
||||||
#define A_LDRD A_Write12Double
|
#define A_LDRD A_Write12Double
|
||||||
#define A_STRD A_Read12Double | A_WriteMemory
|
#define A_STRD A_Read12Double | A_WriteMem
|
||||||
|
|
||||||
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
|
#define A_IMPLEMENT_HD_LDRSTR(x,k) \
|
||||||
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
|
const u32 A_##x##_IMM = A_##k | A_Read16 | A_MemWriteback | ak(ak_##x##_IMM); \
|
||||||
@ -159,11 +159,11 @@ A_IMPLEMENT_HD_LDRSTR(LDRH,LDR)
|
|||||||
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
|
A_IMPLEMENT_HD_LDRSTR(LDRSB,LDR)
|
||||||
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
|
A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
|
||||||
|
|
||||||
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_WriteMemory | ak(ak_SWP);
|
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | A_WriteMem | ak(ak_SWP);
|
||||||
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_WriteMemory | ak(ak_SWPB);
|
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | A_WriteMem | ak(ak_SWPB);
|
||||||
|
|
||||||
const u32 A_LDM = A_Read16 | A_MemWriteback | ak(ak_LDM);
|
const u32 A_LDM = A_Read16 | A_MemWriteback | ak(ak_LDM);
|
||||||
const u32 A_STM = A_Read16 | A_MemWriteback | A_WriteMemory | ak(ak_STM);
|
const u32 A_STM = A_Read16 | A_MemWriteback | A_WriteMem | ak(ak_STM);
|
||||||
|
|
||||||
const u32 A_B = A_BranchAlways | ak(ak_B);
|
const u32 A_B = A_BranchAlways | ak(ak_B);
|
||||||
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
|
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
|
||||||
@ -181,7 +181,7 @@ const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
|
|||||||
|
|
||||||
// THUMB
|
// THUMB
|
||||||
|
|
||||||
#define tk(x) ((x) << 21)
|
#define tk(x) ((x) << 22)
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
T_Read0 = 1 << 0,
|
T_Read0 = 1 << 0,
|
||||||
@ -210,6 +210,8 @@ enum {
|
|||||||
T_SetMaybeC = 1 << 18,
|
T_SetMaybeC = 1 << 18,
|
||||||
T_ReadC = 1 << 19,
|
T_ReadC = 1 << 19,
|
||||||
T_SetC = 1 << 20,
|
T_SetC = 1 << 20,
|
||||||
|
|
||||||
|
T_WriteMem = 1 << 21,
|
||||||
};
|
};
|
||||||
|
|
||||||
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
|
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
|
||||||
@ -253,30 +255,30 @@ const u32 T_ADD_SP = T_WriteR13 | T_ReadR13 | tk(tk_ADD_SP);
|
|||||||
|
|
||||||
const u32 T_LDR_PCREL = T_Write8 | tk(tk_LDR_PCREL);
|
const u32 T_LDR_PCREL = T_Write8 | tk(tk_LDR_PCREL);
|
||||||
|
|
||||||
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STR_REG);
|
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STR_REG);
|
||||||
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STRB_REG);
|
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRB_REG);
|
||||||
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDR_REG);
|
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDR_REG);
|
||||||
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRB_REG);
|
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRB_REG);
|
||||||
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | tk(tk_STRH_REG);
|
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRH_REG);
|
||||||
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSB_REG);
|
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSB_REG);
|
||||||
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRH_REG);
|
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRH_REG);
|
||||||
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSH_REG);
|
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSH_REG);
|
||||||
|
|
||||||
const u32 T_STR_IMM = T_Read0 | T_Read3 | tk(tk_STR_IMM);
|
const u32 T_STR_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STR_IMM);
|
||||||
const u32 T_LDR_IMM = T_Write0 | T_Read3 | tk(tk_LDR_IMM);
|
const u32 T_LDR_IMM = T_Write0 | T_Read3 | tk(tk_LDR_IMM);
|
||||||
const u32 T_STRB_IMM = T_Read0 | T_Read3 | tk(tk_STRB_IMM);
|
const u32 T_STRB_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRB_IMM);
|
||||||
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | tk(tk_LDRB_IMM);
|
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | tk(tk_LDRB_IMM);
|
||||||
const u32 T_STRH_IMM = T_Read0 | T_Read3 | tk(tk_STRH_IMM);
|
const u32 T_STRH_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRH_IMM);
|
||||||
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
|
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
|
||||||
|
|
||||||
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | tk(tk_STR_SPREL);
|
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | T_WriteMem | tk(tk_STR_SPREL);
|
||||||
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
|
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
|
||||||
|
|
||||||
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | tk(tk_PUSH);
|
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | T_WriteMem | tk(tk_PUSH);
|
||||||
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
|
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
|
||||||
|
|
||||||
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
|
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
|
||||||
const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA);
|
const u32 T_STMIA = T_Read8 | T_Write8 | T_WriteMem | tk(tk_STMIA);
|
||||||
|
|
||||||
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
|
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
|
||||||
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
|
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
|
||||||
@ -307,7 +309,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||||||
if (thumb)
|
if (thumb)
|
||||||
{
|
{
|
||||||
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
|
u32 data = THUMBInstrTable[(instr >> 6) & 0x3FF];
|
||||||
res.Kind = (data >> 21) & 0x3F;
|
res.Kind = (data >> 22) & 0x3F;
|
||||||
|
|
||||||
if (data & T_Read0)
|
if (data & T_Read0)
|
||||||
res.SrcRegs |= 1 << (instr & 0x7);
|
res.SrcRegs |= 1 << (instr & 0x7);
|
||||||
@ -356,6 +358,9 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||||||
if (data & T_SetC)
|
if (data & T_SetC)
|
||||||
res.WriteFlags |= flag_C;
|
res.WriteFlags |= flag_C;
|
||||||
|
|
||||||
|
if (data & T_WriteMem)
|
||||||
|
res.SpecialKind = special_WriteMem;
|
||||||
|
|
||||||
res.EndBlock |= res.Branches();
|
res.EndBlock |= res.Branches();
|
||||||
|
|
||||||
if (res.Kind == tk_BCOND)
|
if (res.Kind == tk_BCOND)
|
||||||
@ -382,6 +387,9 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||||||
u32 id = (cn<<8)|(cm<<4)|cpinfo;
|
u32 id = (cn<<8)|(cm<<4)|cpinfo;
|
||||||
if (id == 0x704 || id == 0x782 || id == 0x750 || id == 0x751 || id == 0x752)
|
if (id == 0x704 || id == 0x782 || id == 0x750 || id == 0x751 || id == 0x752)
|
||||||
res.EndBlock |= true;
|
res.EndBlock |= true;
|
||||||
|
|
||||||
|
if (id == 0x704 || id == 0x782)
|
||||||
|
res.SpecialKind = special_WaitForInterrupt;
|
||||||
}
|
}
|
||||||
if (res.Kind == ak_MCR || res.Kind == ak_MRC)
|
if (res.Kind == ak_MCR || res.Kind == ak_MRC)
|
||||||
{
|
{
|
||||||
@ -449,6 +457,9 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||||||
if ((data & A_SetC) || (data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))
|
if ((data & A_SetC) || (data & A_StaticShiftSetC) && ((instr >> 7) & 0x1F))
|
||||||
res.WriteFlags |= flag_C;
|
res.WriteFlags |= flag_C;
|
||||||
|
|
||||||
|
if (data & A_WriteMem)
|
||||||
|
res.SpecialKind = special_WriteMem;
|
||||||
|
|
||||||
if ((instr >> 28) < 0xE)
|
if ((instr >> 28) < 0xE)
|
||||||
{
|
{
|
||||||
// make non conditional flag sets conditional
|
// make non conditional flag sets conditional
|
||||||
|
@ -226,18 +226,27 @@ enum
|
|||||||
flag_V = 1 << 0,
|
flag_V = 1 << 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
special_NotSpecialAtAll = 0,
|
||||||
|
special_WriteMem,
|
||||||
|
special_WaitForInterrupt
|
||||||
|
};
|
||||||
|
|
||||||
struct Info
|
struct Info
|
||||||
{
|
{
|
||||||
u16 DstRegs, SrcRegs;
|
u16 DstRegs, SrcRegs;
|
||||||
u16 Kind;
|
u16 Kind;
|
||||||
|
|
||||||
|
u8 SpecialKind;
|
||||||
|
|
||||||
u8 ReadFlags;
|
u8 ReadFlags;
|
||||||
// lower 4 bits - set always
|
// lower 4 bits - set always
|
||||||
// upper 4 bits - might set flag
|
// upper 4 bits - might set flag
|
||||||
u8 WriteFlags;
|
u8 WriteFlags;
|
||||||
|
|
||||||
bool EndBlock;
|
bool EndBlock;
|
||||||
bool Branches()
|
bool Branches() const
|
||||||
{
|
{
|
||||||
return DstRegs & (1 << 15);
|
return DstRegs & (1 << 15);
|
||||||
}
|
}
|
||||||
|
12
src/CP15.cpp
12
src/CP15.cpp
@ -562,9 +562,11 @@ void ARMv5::CP15Write(u32 id, u32 val)
|
|||||||
|
|
||||||
|
|
||||||
case 0x750:
|
case 0x750:
|
||||||
|
ARMJIT::InvalidateAll();
|
||||||
ICacheInvalidateAll();
|
ICacheInvalidateAll();
|
||||||
return;
|
return;
|
||||||
case 0x751:
|
case 0x751:
|
||||||
|
ARMJIT::InvalidateByAddr(ARMJIT::TranslateAddr<0>(val));
|
||||||
ICacheInvalidateByAddr(val);
|
ICacheInvalidateByAddr(val);
|
||||||
return;
|
return;
|
||||||
case 0x752:
|
case 0x752:
|
||||||
@ -814,7 +816,7 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
|
|||||||
DataCycles = 1;
|
DataCycles = 1;
|
||||||
*(u8*)&ITCM[addr & 0x7FFF] = val;
|
*(u8*)&ITCM[addr & 0x7FFF] = val;
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
|
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -838,7 +840,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
|
|||||||
DataCycles = 1;
|
DataCycles = 1;
|
||||||
*(u16*)&ITCM[addr & 0x7FFF] = val;
|
*(u16*)&ITCM[addr & 0x7FFF] = val;
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
|
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -862,8 +864,7 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
|
|||||||
DataCycles = 1;
|
DataCycles = 1;
|
||||||
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
|
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||||
ARMJIT::cache.ARM9_ITCM[((addr + 2) & 0x7FFF) >> 1] = NULL;
|
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -887,8 +888,7 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
|
|||||||
DataCycles += 1;
|
DataCycles += 1;
|
||||||
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::cache.ARM9_ITCM[(addr & 0x7FFF) >> 1] = NULL;
|
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||||
ARMJIT::cache.ARM9_ITCM[((addr & 0x7FFF) >> 1) + 1] = NULL;
|
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -40,6 +40,7 @@ char DSiNANDPath[1024];
|
|||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
bool JIT_Enable = false;
|
bool JIT_Enable = false;
|
||||||
int JIT_MaxBlockSize = 12;
|
int JIT_MaxBlockSize = 12;
|
||||||
|
bool JIT_BrancheOptimisations = true;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ConfigEntry ConfigFile[] =
|
ConfigEntry ConfigFile[] =
|
||||||
@ -56,6 +57,7 @@ ConfigEntry ConfigFile[] =
|
|||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
||||||
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
||||||
|
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{"", -1, NULL, 0, NULL, 0}
|
{"", -1, NULL, 0, NULL, 0}
|
||||||
|
@ -54,6 +54,7 @@ extern char DSiNANDPath[1024];
|
|||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
extern bool JIT_Enable;
|
extern bool JIT_Enable;
|
||||||
extern int JIT_MaxBlockSize;
|
extern int JIT_MaxBlockSize;
|
||||||
|
extern bool JIT_BrancheOptimisations;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
22
src/NDS.cpp
22
src/NDS.cpp
@ -575,7 +575,7 @@ void Reset()
|
|||||||
RCnt = 0;
|
RCnt = 0;
|
||||||
|
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::InvalidateBlockCache();
|
ARMJIT::ResetBlockCache();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
NDSCart::Reset();
|
NDSCart::Reset();
|
||||||
@ -807,7 +807,7 @@ bool DoSavestate(Savestate* file)
|
|||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
if (!file->Saving)
|
if (!file->Saving)
|
||||||
{
|
{
|
||||||
ARMJIT::InvalidateBlockCache();
|
ARMJIT::ResetBlockCache();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2016,10 +2016,6 @@ u32 ARM9Read32(u32 addr)
|
|||||||
|
|
||||||
void ARM9Write8(u32 addr, u8 val)
|
void ARM9Write8(u32 addr, u8 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
|
||||||
ARMJIT::Invalidate16<0>(addr);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
switch (addr & 0xFF000000)
|
switch (addr & 0xFF000000)
|
||||||
{
|
{
|
||||||
case 0x02000000:
|
case 0x02000000:
|
||||||
@ -2070,10 +2066,6 @@ void ARM9Write8(u32 addr, u8 val)
|
|||||||
|
|
||||||
void ARM9Write16(u32 addr, u16 val)
|
void ARM9Write16(u32 addr, u16 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
|
||||||
ARMJIT::Invalidate16<0>(addr);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
switch (addr & 0xFF000000)
|
switch (addr & 0xFF000000)
|
||||||
{
|
{
|
||||||
case 0x02000000:
|
case 0x02000000:
|
||||||
@ -2140,10 +2132,6 @@ void ARM9Write16(u32 addr, u16 val)
|
|||||||
|
|
||||||
void ARM9Write32(u32 addr, u32 val)
|
void ARM9Write32(u32 addr, u32 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
|
||||||
ARMJIT::Invalidate32<0>(addr);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
switch (addr & 0xFF000000)
|
switch (addr & 0xFF000000)
|
||||||
{
|
{
|
||||||
case 0x02000000:
|
case 0x02000000:
|
||||||
@ -2439,7 +2427,7 @@ u32 ARM7Read32(u32 addr)
|
|||||||
void ARM7Write8(u32 addr, u8 val)
|
void ARM7Write8(u32 addr, u8 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::Invalidate16<1>(addr);
|
ARMJIT::InvalidateByAddr7(addr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
switch (addr & 0xFF800000)
|
switch (addr & 0xFF800000)
|
||||||
@ -2502,7 +2490,7 @@ void ARM7Write8(u32 addr, u8 val)
|
|||||||
void ARM7Write16(u32 addr, u16 val)
|
void ARM7Write16(u32 addr, u16 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::Invalidate16<1>(addr);
|
ARMJIT::InvalidateByAddr7(addr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
switch (addr & 0xFF800000)
|
switch (addr & 0xFF800000)
|
||||||
@ -2575,7 +2563,7 @@ void ARM7Write16(u32 addr, u16 val)
|
|||||||
void ARM7Write32(u32 addr, u32 val)
|
void ARM7Write32(u32 addr, u32 val)
|
||||||
{
|
{
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
ARMJIT::Invalidate32<1>(addr);
|
ARMJIT::InvalidateByAddr7(addr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
switch (addr & 0xFF800000)
|
switch (addr & 0xFF800000)
|
||||||
|
Reference in New Issue
Block a user