mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 21:37:42 -07:00
make literal optimisation more reliable
fixes spanish Pokemon HeartGold
This commit is contained in:
parent
d1d96d2236
commit
3e7483636f
@ -161,6 +161,27 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
||||
}
|
||||
}
|
||||
|
||||
bool DecodeLiteral(const FetchedInstr& instr, u32& addr)
|
||||
{
|
||||
switch (instr.Info.Kind)
|
||||
{
|
||||
case ARMInstrInfo::ak_STR_IMM:
|
||||
case ARMInstrInfo::ak_STRB_IMM:
|
||||
addr = (instr.Addr + 8) + ((instr.Instr & 0xFFF) * (instr.Instr & (1 << 23) ? 1 : -1));
|
||||
return true;
|
||||
case ARMInstrInfo::ak_STRD_IMM:
|
||||
case ARMInstrInfo::ak_STRH_IMM:
|
||||
addr = (instr.Addr + 8) + (((instr.Instr & 0xF00) >> 4 | (instr.Instr & 0xF)) * (instr.Instr & (1 << 23) ? 1 : -1));
|
||||
return true;
|
||||
case ARMInstrInfo::ak_STM: // I honestly hope noone was ever crazy enough to do stm pc, {whatever}
|
||||
addr = instr.Addr + 8;
|
||||
return true;
|
||||
default:
|
||||
JIT_DEBUGPRINT("Literal %08x %x not recognised\n", instr.Instr, instr.Addr);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
|
||||
u32& linkAddr, u32& targetAddr)
|
||||
{
|
||||
@ -463,6 +484,23 @@ void CompileBlock(ARM* cpu)
|
||||
instrs[i].DataCycles = cpu->DataCycles;
|
||||
instrs[i].DataRegion = cpu->DataRegion;
|
||||
|
||||
if (instrs[i].Info.SpecialKind == ARMInstrInfo::special_WriteMem
|
||||
&& instrs[i].Info.SrcRegs == (1 << 15)
|
||||
&& instrs[i].Info.DstRegs == 0)
|
||||
{
|
||||
assert (!thumb);
|
||||
|
||||
u32 addr;
|
||||
if (DecodeLiteral(instrs[i], addr))
|
||||
{
|
||||
JIT_DEBUGPRINT("pc relative write detected\n");
|
||||
u32 translatedAddr = cpu->Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||
|
||||
ARMJIT::InvalidateByAddr(translatedAddr, false);
|
||||
CodeRanges[translatedAddr / 512].InvalidLiterals |= (1 << ((translatedAddr & 0x1FF) / 16));
|
||||
}
|
||||
}
|
||||
|
||||
if (thumb && instrs[i].Info.Kind == ARMInstrInfo::tk_BL_LONG_2 && i > 0
|
||||
&& instrs[i - 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_1)
|
||||
{
|
||||
@ -631,7 +669,7 @@ void CompileBlock(ARM* cpu)
|
||||
JitBlocks.Add(block);
|
||||
}
|
||||
|
||||
void InvalidateByAddr(u32 pseudoPhysical)
|
||||
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore)
|
||||
{
|
||||
JIT_DEBUGPRINT("invalidating by addr %x\n", pseudoPhysical);
|
||||
AddressRange* range = &CodeRanges[pseudoPhysical / 512];
|
||||
@ -657,11 +695,14 @@ void InvalidateByAddr(u32 pseudoPhysical)
|
||||
|
||||
FastBlockAccess[block->PseudoPhysicalAddr / 2] = NULL;
|
||||
|
||||
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
||||
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
||||
delete RestoreCandidates[slot];
|
||||
if (mayRestore)
|
||||
{
|
||||
u32 slot = HashRestoreCandidate(block->PseudoPhysicalAddr);
|
||||
if (RestoreCandidates[slot] && RestoreCandidates[slot] != block)
|
||||
delete RestoreCandidates[slot];
|
||||
|
||||
RestoreCandidates[slot] = block;
|
||||
RestoreCandidates[slot] = block;
|
||||
}
|
||||
}
|
||||
if ((range->TimesInvalidated + 1) > range->TimesInvalidated)
|
||||
range->TimesInvalidated++;
|
||||
@ -732,6 +773,7 @@ void ResetBlockCache()
|
||||
u32 addr = block->AddressRanges()[j];
|
||||
CodeRanges[addr / 512].Blocks.Clear();
|
||||
CodeRanges[addr / 512].TimesInvalidated = 0;
|
||||
CodeRanges[addr / 512].InvalidLiterals = 0;
|
||||
}
|
||||
delete block;
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ inline JitBlockEntry LookUpBlock(u32 addr)
|
||||
void Init();
|
||||
void DeInit();
|
||||
|
||||
void InvalidateByAddr(u32 pseudoPhysical);
|
||||
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
|
||||
void InvalidateAll();
|
||||
|
||||
void InvalidateITCM(u32 addr);
|
||||
|
@ -63,7 +63,7 @@ struct __attribute__((packed)) TinyVector
|
||||
{
|
||||
T* Data = NULL;
|
||||
u16 Capacity = 0;
|
||||
u32 Length = 0; // make it 32 bit so we don't need movzx
|
||||
u16 Length = 0;
|
||||
|
||||
~TinyVector()
|
||||
{
|
||||
@ -181,6 +181,7 @@ private:
|
||||
struct __attribute__((packed)) AddressRange
|
||||
{
|
||||
TinyVector<JitBlock*> Blocks;
|
||||
u16 InvalidLiterals;
|
||||
u16 TimesInvalidated;
|
||||
};
|
||||
|
||||
|
@ -108,7 +108,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
JMP((u8*)InvalidateByAddr, true);
|
||||
SetJumpTarget(noCode);
|
||||
@ -206,7 +206,7 @@ void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(32, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
||||
@ -278,10 +278,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
void fault(u32 a, u32 b)
|
||||
/*void fault(u32 a, u32 b, u32 c, u32 d)
|
||||
{
|
||||
printf("actually not static! %x %x\n", a, b);
|
||||
}
|
||||
printf("actually not static! %x %x %x %x\n", a, b, c, d);
|
||||
}*/
|
||||
|
||||
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||
{
|
||||
@ -291,11 +291,17 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||
if (size == 16)
|
||||
addressMask = ~1;
|
||||
|
||||
//bool check = false;
|
||||
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
||||
{
|
||||
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
Comp_MemLoadLiteral(size, rd, addr);
|
||||
return;
|
||||
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||
|
||||
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
|
||||
{
|
||||
Comp_MemLoadLiteral(size, rd, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -438,6 +444,20 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||
|
||||
CALL(memoryFunc);
|
||||
|
||||
/*if (Num == 0 && check)
|
||||
{
|
||||
CMP(32, R(EAX), rdMapped);
|
||||
FixupBranch notEqual = J_CC(CC_E);
|
||||
ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||
MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
|
||||
MOV(32, R(ABI_PARAM2), R(EAX));
|
||||
MOV(32, R(ABI_PARAM3), rdMapped);
|
||||
MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
|
||||
CALL((u8*)fault);
|
||||
ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||
SetJumpTarget(notEqual);
|
||||
}*/
|
||||
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (inlinePreparation && size == 32)
|
||||
|
Loading…
Reference in New Issue
Block a user