mirror of
https://github.com/melonDS-emu/melonDS.git
synced 2024-11-14 13:27:41 -07:00
move ARM64 JIT backend here
This commit is contained in:
parent
baed0ac0d5
commit
99b34efe2d
@ -36,7 +36,7 @@ detect_architecture("__i386__" x86)
|
||||
detect_architecture("__arm__" ARM)
|
||||
detect_architecture("__aarch64__" ARM64)
|
||||
|
||||
if (ARCHITECTURE STREQUAL x86_64)
|
||||
if (ARCHITECTURE STREQUAL x86_64 OR ARCHITECTURE STREQUAL ARM64)
|
||||
option(ENABLE_JIT "Enable x64 JIT recompiler" ON)
|
||||
endif()
|
||||
|
||||
|
@ -254,10 +254,14 @@ public:
|
||||
|
||||
u32 DTCMSetting, ITCMSetting;
|
||||
|
||||
u8 ITCM[0x8000];
|
||||
// for aarch64 JIT they need to go up here
|
||||
// to be addressable by a 12-bit immediate
|
||||
u32 ITCMSize;
|
||||
u8 DTCM[0x4000];
|
||||
u32 DTCMBase, DTCMSize;
|
||||
s32 RegionCodeCycles;
|
||||
|
||||
u8 ITCM[0x8000];
|
||||
u8 DTCM[0x4000];
|
||||
|
||||
u8 ICache[0x2000];
|
||||
u32 ICacheTags[64*4];
|
||||
@ -282,7 +286,6 @@ public:
|
||||
// code/16N/32N/32S
|
||||
u8 MemTimings[0x100000][4];
|
||||
|
||||
s32 RegionCodeCycles;
|
||||
u8* CurICacheLine;
|
||||
|
||||
bool (*GetMemRegion)(u32 addr, bool write, NDS::MemRegion* region);
|
||||
|
@ -6,7 +6,11 @@
|
||||
#include "Config.h"
|
||||
|
||||
#include "ARMJIT_Internal.h"
|
||||
#if defined(__x86_64__)
|
||||
#include "ARMJIT_x64/ARMJIT_Compiler.h"
|
||||
#else
|
||||
#include "ARMJIT_A64/ARMJIT_Compiler.h"
|
||||
#endif
|
||||
|
||||
#include "ARMInterpreter_ALU.h"
|
||||
#include "ARMInterpreter_LoadStore.h"
|
||||
|
837
src/ARMJIT_A64/ARMJIT_ALU.cpp
Normal file
837
src/ARMJIT_A64/ARMJIT_ALU.cpp
Normal file
@ -0,0 +1,837 @@
|
||||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
void Compiler::Comp_RegShiftReg(int op, bool S, Op2& op2, ARM64Reg rs)
|
||||
{
|
||||
if (!(CurInstr.SetFlags & 0x2))
|
||||
S = false;
|
||||
|
||||
CPSRDirty |= S;
|
||||
|
||||
UBFX(W1, rs, 0, 8);
|
||||
|
||||
if (!S)
|
||||
{
|
||||
if (op == 3)
|
||||
RORV(W0, op2.Reg.Rm, W1);
|
||||
else
|
||||
{
|
||||
CMP(W1, 32);
|
||||
if (op == 2)
|
||||
{
|
||||
MOVI2R(W2, 31);
|
||||
CSEL(W1, W2, W1, CC_GE);
|
||||
ASRV(W0, op2.Reg.Rm, W1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op == 0)
|
||||
LSLV(W0, op2.Reg.Rm, W1);
|
||||
else if (op == 1)
|
||||
LSRV(W0, op2.Reg.Rm, W1);
|
||||
CSEL(W0, WZR, W0, CC_GE);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(W0, op2.Reg.Rm);
|
||||
FixupBranch zero = CBZ(W1);
|
||||
|
||||
SUB(W1, W1, 1);
|
||||
if (op == 3)
|
||||
{
|
||||
RORV(W0, op2.Reg.Rm, W1);
|
||||
BFI(RCPSR, W0, 29, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
CMP(W1, 31);
|
||||
if (op == 2)
|
||||
{
|
||||
MOVI2R(W2, 31);
|
||||
CSEL(W1, W2, W1, CC_GT);
|
||||
ASRV(W0, op2.Reg.Rm, W1);
|
||||
BFI(RCPSR, W0, 29, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op == 0)
|
||||
{
|
||||
LSLV(W0, op2.Reg.Rm, W1);
|
||||
UBFX(W1, W0, 31, 1);
|
||||
}
|
||||
else if (op == 1)
|
||||
LSRV(W0, op2.Reg.Rm, W1);
|
||||
CSEL(W1, WZR, op ? W0 : W1, CC_GT);
|
||||
BFI(RCPSR, W1, 29, 1);
|
||||
CSEL(W0, WZR, W0, CC_GE);
|
||||
}
|
||||
}
|
||||
|
||||
MOV(W0, W0, ArithOption(W0, (ShiftType)op, 1));
|
||||
SetJumpTarget(zero);
|
||||
}
|
||||
op2 = Op2(W0, ST_LSL, 0);
|
||||
}
|
||||
|
||||
void Compiler::Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, ARM64Reg tmp)
|
||||
{
|
||||
if (!(CurInstr.SetFlags & 0x2))
|
||||
S = false;
|
||||
|
||||
CPSRDirty |= S;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0: // LSL
|
||||
if (S && amount)
|
||||
{
|
||||
UBFX(tmp, op2.Reg.Rm, 32 - amount, 1);
|
||||
BFI(RCPSR, tmp, 29, 1);
|
||||
}
|
||||
op2 = Op2(op2.Reg.Rm, ST_LSL, amount);
|
||||
return;
|
||||
case 1: // LSR
|
||||
if (S)
|
||||
{
|
||||
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
|
||||
BFI(RCPSR, tmp, 29, 1);
|
||||
}
|
||||
if (amount == 0)
|
||||
{
|
||||
op2 = Op2(0);
|
||||
return;
|
||||
}
|
||||
op2 = Op2(op2.Reg.Rm, ST_LSR, amount);
|
||||
return;
|
||||
case 2: // ASR
|
||||
if (S)
|
||||
{
|
||||
UBFX(tmp, op2.Reg.Rm, (amount ? amount : 32) - 1, 1);
|
||||
BFI(RCPSR, tmp, 29, 1);
|
||||
}
|
||||
op2 = Op2(op2.Reg.Rm, ST_ASR, amount ? amount : 31);
|
||||
return;
|
||||
case 3: // ROR
|
||||
if (amount == 0)
|
||||
{
|
||||
UBFX(tmp, RCPSR, 29, 1);
|
||||
LSL(tmp, tmp, 31);
|
||||
if (S)
|
||||
BFI(RCPSR, op2.Reg.Rm, 29, 1);
|
||||
ORR(tmp, tmp, op2.Reg.Rm, ArithOption(tmp, ST_LSR, 1));
|
||||
|
||||
op2 = Op2(tmp, ST_LSL, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (S)
|
||||
{
|
||||
UBFX(tmp, op2.Reg.Rm, amount - 1, 1);
|
||||
BFI(RCPSR, tmp, 29, 1);
|
||||
}
|
||||
op2 = Op2(op2.Reg.Rm, ST_ROR, amount);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_RetriveFlags(bool retriveCV)
|
||||
{
|
||||
if (CurInstr.SetFlags)
|
||||
CPSRDirty = true;
|
||||
|
||||
if (CurInstr.SetFlags & 0x4)
|
||||
{
|
||||
CSET(W0, CC_EQ);
|
||||
BFI(RCPSR, W0, 30, 1);
|
||||
}
|
||||
if (CurInstr.SetFlags & 0x8)
|
||||
{
|
||||
CSET(W0, CC_MI);
|
||||
BFI(RCPSR, W0, 31, 1);
|
||||
}
|
||||
if (retriveCV)
|
||||
{
|
||||
if (CurInstr.SetFlags & 0x2)
|
||||
{
|
||||
CSET(W0, CC_CS);
|
||||
BFI(RCPSR, W0, 29, 1);
|
||||
}
|
||||
if (CurInstr.SetFlags & 0x1)
|
||||
{
|
||||
CSET(W0, CC_VS);
|
||||
BFI(RCPSR, W0, 28, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_Logical(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
|
||||
{
|
||||
if (S && !CurInstr.SetFlags)
|
||||
S = false;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0x0: // AND
|
||||
if (S)
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ANDSI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
ANDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ANDI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
AND(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
case 0x1: // EOR
|
||||
if (op2.IsImm)
|
||||
EORI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
EOR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
if (S && FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
break;
|
||||
case 0xC: // ORR
|
||||
if (op2.IsImm)
|
||||
ORRI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
ORR(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
if (S && FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
break;
|
||||
case 0xE: // BIC
|
||||
if (S)
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ANDSI2R(rd, rn, ~op2.Imm, W0);
|
||||
else
|
||||
BICS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ANDI2R(rd, rn, ~op2.Imm, W0);
|
||||
else
|
||||
BIC(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (S)
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
|
||||
void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2)
|
||||
{
|
||||
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
|
||||
{
|
||||
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
|
||||
op2 = Op2(W0, ST_LSL, 0);
|
||||
}
|
||||
|
||||
if (S && !CurInstr.SetFlags)
|
||||
S = false;
|
||||
|
||||
bool CVInGP = false;
|
||||
switch (op)
|
||||
{
|
||||
case 0x2: // SUB
|
||||
if (S)
|
||||
{
|
||||
if (op2.IsImm)
|
||||
SUBSI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
SUBS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op2.IsImm)
|
||||
{
|
||||
MOVI2R(W2, op2.Imm);
|
||||
SUBI2R(rd, rn, op2.Imm, W0);
|
||||
}
|
||||
else
|
||||
SUB(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
case 0x3: // RSB
|
||||
if (op2.IsZero())
|
||||
{
|
||||
op2 = Op2(WZR);
|
||||
}
|
||||
else if (op2.IsImm)
|
||||
{
|
||||
MOVI2R(W1, op2.Imm);
|
||||
op2 = Op2(W1);
|
||||
}
|
||||
else if (op2.Reg.ShiftAmount != 0)
|
||||
{
|
||||
MOV(W1, op2.Reg.Rm, op2.ToArithOption());
|
||||
op2 = Op2(W1);
|
||||
}
|
||||
|
||||
if (S)
|
||||
SUBS(rd, op2.Reg.Rm, rn);
|
||||
else
|
||||
SUB(rd, op2.Reg.Rm, rn);
|
||||
break;
|
||||
case 0x4: // ADD
|
||||
if (S)
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ADDSI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
ADDS(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op2.IsImm)
|
||||
ADDI2R(rd, rn, op2.Imm, W0);
|
||||
else
|
||||
ADD(rd, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
case 0x5: // ADC
|
||||
UBFX(W2, RCPSR, 29, 1);
|
||||
if (S)
|
||||
{
|
||||
CVInGP = true;
|
||||
ADDS(W1, rn, W2);
|
||||
CSET(W2, CC_CS);
|
||||
CSET(W3, CC_VS);
|
||||
if (op2.IsImm)
|
||||
ADDSI2R(rd, W1, op2.Imm, W0);
|
||||
else
|
||||
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
|
||||
CSINC(W2, W2, WZR, CC_CC);
|
||||
CSINC(W3, W3, WZR, CC_VC);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD(W1, rn, W2);
|
||||
if (op2.IsImm)
|
||||
ADDI2R(rd, W1, op2.Imm, W0);
|
||||
else
|
||||
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
case 0x6: // SBC
|
||||
UBFX(W2, RCPSR, 29, 1);
|
||||
// W1 = -op2 - 1
|
||||
if (op2.IsImm)
|
||||
MOVI2R(W1, ~op2.Imm);
|
||||
else
|
||||
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
|
||||
if (S)
|
||||
{
|
||||
CVInGP = true;
|
||||
ADDS(W1, W2, W1);
|
||||
CSET(W2, CC_CS);
|
||||
CSET(W3, CC_VS);
|
||||
ADDS(rd, rn, W1);
|
||||
CSINC(W2, W2, WZR, CC_CC);
|
||||
CSINC(W3, W3, WZR, CC_VC);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD(W1, W2, W1);
|
||||
ADD(rd, rn, W1);
|
||||
}
|
||||
break;
|
||||
case 0x7: // RSC
|
||||
UBFX(W2, RCPSR, 29, 1);
|
||||
// W1 = -rn - 1
|
||||
MVN(W1, rn);
|
||||
if (S)
|
||||
{
|
||||
CVInGP = true;
|
||||
ADDS(W1, W2, W1);
|
||||
CSET(W2, CC_CS);
|
||||
CSET(W3, CC_VS);
|
||||
if (op2.IsImm)
|
||||
ADDSI2R(rd, W1, op2.Imm);
|
||||
else
|
||||
ADDS(rd, W1, op2.Reg.Rm, op2.ToArithOption());
|
||||
CSINC(W2, W2, WZR, CC_CC);
|
||||
CSINC(W3, W3, WZR, CC_VC);
|
||||
}
|
||||
else
|
||||
{
|
||||
ADD(W1, W2, W1);
|
||||
if (op2.IsImm)
|
||||
ADDI2R(rd, W1, op2.Imm);
|
||||
else
|
||||
ADD(rd, W1, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (S)
|
||||
{
|
||||
if (CVInGP)
|
||||
{
|
||||
BFI(RCPSR, W2, 29, 1);
|
||||
BFI(RCPSR, W3, 28, 1);
|
||||
}
|
||||
Comp_RetriveFlags(!CVInGP);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_Compare(int op, ARM64Reg rn, Op2 op2)
|
||||
{
|
||||
if (!op2.IsImm && op2.Reg.ShiftType == ST_ROR)
|
||||
{
|
||||
MOV(W0, op2.Reg.Rm, op2.ToArithOption());
|
||||
op2 = Op2(W0, ST_LSL, 0);
|
||||
}
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0x8: // TST
|
||||
if (op2.IsImm)
|
||||
TSTI2R(rn, op2.Imm, W0);
|
||||
else
|
||||
ANDS(WZR, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
break;
|
||||
case 0x9: // TEQ
|
||||
if (op2.IsImm)
|
||||
EORI2R(W0, rn, op2.Imm, W0);
|
||||
else
|
||||
EOR(W0, rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
TST(W0, W0);
|
||||
break;
|
||||
case 0xA: // CMP
|
||||
if (op2.IsImm)
|
||||
CMPI2R(rn, op2.Imm, W0);
|
||||
else
|
||||
CMP(rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
break;
|
||||
case 0xB: // CMN
|
||||
if (op2.IsImm)
|
||||
ADDSI2R(WZR, rn, op2.Imm, W0);
|
||||
else
|
||||
CMN(rn, op2.Reg.Rm, op2.ToArithOption());
|
||||
break;
|
||||
}
|
||||
|
||||
Comp_RetriveFlags(op >= 0xA);
|
||||
}
|
||||
|
||||
// also counts cycles!
|
||||
void Compiler::A_Comp_GetOp2(bool S, Op2& op2)
|
||||
{
|
||||
if (CurInstr.Instr & (1 << 25))
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
op2 = Op2(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E));
|
||||
}
|
||||
else
|
||||
{
|
||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||
op2.Reg.Rm = MapReg(CurInstr.A_Reg(0));
|
||||
if (CurInstr.Instr & (1 << 4))
|
||||
{
|
||||
Comp_AddCycles_CI(1);
|
||||
|
||||
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
|
||||
if (CurInstr.A_Reg(0) == 15)
|
||||
{
|
||||
ADD(W0, op2.Reg.Rm, 4);
|
||||
op2.Reg.Rm = W0;
|
||||
}
|
||||
Comp_RegShiftReg(op, S, op2, rs);
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
int amount = (CurInstr.Instr >> 7) & 0x1F;
|
||||
Comp_RegShiftImm(op, amount, S, op2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_ALUCmpOp()
|
||||
{
|
||||
u32 op = (CurInstr.Instr >> 21) & 0xF;
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
|
||||
Op2 op2;
|
||||
A_Comp_GetOp2(op <= 0x9, op2);
|
||||
|
||||
Comp_Compare(op, rn, op2);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_ALUMovOp()
|
||||
{
|
||||
bool S = CurInstr.Instr & (1 << 20);
|
||||
u32 op = (CurInstr.Instr >> 21) & 0xF;
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
|
||||
Op2 op2;
|
||||
A_Comp_GetOp2(S, op2);
|
||||
|
||||
if (op == 0xF) // MVN
|
||||
{
|
||||
if (op2.IsImm)
|
||||
{
|
||||
if (CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(CurInstr.A_Reg(12), ~op2.Imm);
|
||||
MOVI2R(rd, ~op2.Imm);
|
||||
}
|
||||
else
|
||||
ORN(rd, WZR, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
else // MOV
|
||||
{
|
||||
if (op2.IsImm)
|
||||
{
|
||||
if (CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(CurInstr.A_Reg(12), op2.Imm);
|
||||
MOVI2R(rd, op2.Imm);
|
||||
}
|
||||
else
|
||||
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
|
||||
}
|
||||
|
||||
if (S)
|
||||
{
|
||||
if (FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
|
||||
if (CurInstr.Info.Branches())
|
||||
Comp_JumpTo(rd, true, S);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_ALUTriOp()
|
||||
{
|
||||
bool S = CurInstr.Instr & (1 << 20);
|
||||
u32 op = (CurInstr.Instr >> 21) & 0xF;
|
||||
bool logical = (1 << op) & 0xF303;
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
|
||||
Op2 op2;
|
||||
A_Comp_GetOp2(S && logical, op2);
|
||||
|
||||
if (op2.IsImm && op2.Imm == 0)
|
||||
op2 = Op2(WZR, ST_LSL, 0);
|
||||
|
||||
if (logical)
|
||||
Comp_Logical(op, S, rd, rn, op2);
|
||||
else
|
||||
Comp_Arithmetic(op, S, rd, rn, op2);
|
||||
|
||||
if (CurInstr.Info.Branches())
|
||||
Comp_JumpTo(rd, true, S);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_Clz()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
|
||||
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
|
||||
|
||||
CLZ(rd, rm);
|
||||
|
||||
assert(Num == 0);
|
||||
}
|
||||
|
||||
void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg rs, ARM64Reg rn)
|
||||
{
|
||||
if (Num == 0)
|
||||
{
|
||||
Comp_AddCycles_CI(S ? 3 : 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
CLZ(W0, rs);
|
||||
CLS(W1, rs);
|
||||
CMP(W0, W1);
|
||||
CSEL(W0, W0, W1, CC_GT);
|
||||
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
|
||||
}
|
||||
|
||||
if (mla)
|
||||
MADD(rd, rm, rs, rn);
|
||||
else
|
||||
MUL(rd, rm, rs);
|
||||
|
||||
if (S && FlagsNZNeeded())
|
||||
{
|
||||
TST(rd, rd);
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_Mul_Long()
|
||||
{
|
||||
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
|
||||
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
|
||||
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
|
||||
|
||||
bool S = CurInstr.Instr & (1 << 20);
|
||||
bool add = CurInstr.Instr & (1 << 21);
|
||||
bool sign = CurInstr.Instr & (1 << 22);
|
||||
|
||||
if (Num == 0)
|
||||
{
|
||||
Comp_AddCycles_CI(S ? 3 : 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
CLZ(W0, rs);
|
||||
CLS(W1, rs);
|
||||
CMP(W0, W1);
|
||||
CSEL(W0, W0, W1, CC_GT);
|
||||
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
|
||||
}
|
||||
|
||||
if (add)
|
||||
{
|
||||
MOV(W0, rn);
|
||||
BFI(X0, EncodeRegTo64(rd), 32, 32);
|
||||
if (sign)
|
||||
SMADDL(EncodeRegTo64(rn), rm, rs, X0);
|
||||
else
|
||||
UMADDL(EncodeRegTo64(rn), rm, rs, X0);
|
||||
if (S && FlagsNZNeeded())
|
||||
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
|
||||
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sign)
|
||||
SMULL(EncodeRegTo64(rn), rm, rs);
|
||||
else
|
||||
UMULL(EncodeRegTo64(rn), rm, rs);
|
||||
if (S && FlagsNZNeeded())
|
||||
TST(EncodeRegTo64(rn), EncodeRegTo64(rn));
|
||||
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
|
||||
}
|
||||
|
||||
if (S)
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_Mul()
|
||||
{
|
||||
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
|
||||
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
|
||||
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
|
||||
|
||||
bool S = CurInstr.Instr & (1 << 20);
|
||||
bool mla = CurInstr.Instr & (1 << 21);
|
||||
ARM64Reg rn = INVALID_REG;
|
||||
if (mla)
|
||||
rn = MapReg(CurInstr.A_Reg(12));
|
||||
|
||||
Comp_Mul_Mla(S, mla, rd, rm, rs, rn);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_ShiftImm()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
u32 op = (CurInstr.Instr >> 11) & 0x3;
|
||||
int amount = (CurInstr.Instr >> 6) & 0x1F;
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
|
||||
Op2 op2;
|
||||
op2.Reg.Rm = MapReg(CurInstr.T_Reg(3));
|
||||
Comp_RegShiftImm(op, amount, true, op2);
|
||||
if (op2.IsImm)
|
||||
MOVI2R(rd, op2.Imm);
|
||||
else
|
||||
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
|
||||
if (FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_AddSub_()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
Op2 op2;
|
||||
if (CurInstr.Instr & (1 << 10))
|
||||
op2 = Op2((CurInstr.Instr >> 6) & 0x7);
|
||||
else
|
||||
op2 = Op2(MapReg(CurInstr.T_Reg(6)));
|
||||
|
||||
Comp_Arithmetic(
|
||||
CurInstr.Instr & (1 << 9) ? 0x2 : 0x4,
|
||||
true,
|
||||
MapReg(CurInstr.T_Reg(0)),
|
||||
MapReg(CurInstr.T_Reg(3)),
|
||||
op2);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_ALUImm8()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
u32 imm = CurInstr.Instr & 0xFF;
|
||||
int op = (CurInstr.Instr >> 11) & 0x3;
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0:
|
||||
MOVI2R(rd, imm);
|
||||
if (FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
Comp_RetriveFlags(false);
|
||||
break;
|
||||
case 1:
|
||||
Comp_Compare(0xA, rd, Op2(imm));
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
Comp_Arithmetic(op == 2 ? 0x4 : 0x2, true, rd, rd, Op2(imm));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_ALU()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 6) & 0xF;
|
||||
ARM64Reg rd = MapReg(CurInstr.T_Reg(0));
|
||||
ARM64Reg rs = MapReg(CurInstr.T_Reg(3));
|
||||
|
||||
if ((op >= 0x2 && op <= 0x4) || op == 0x7)
|
||||
Comp_AddCycles_CI(1);
|
||||
else
|
||||
Comp_AddCycles_C();
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0x0:
|
||||
Comp_Logical(0x0, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0x1:
|
||||
Comp_Logical(0x1, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0x2:
|
||||
case 0x3:
|
||||
case 0x4:
|
||||
case 0x7:
|
||||
{
|
||||
Op2 op2;
|
||||
op2.Reg.Rm = rd;
|
||||
Comp_RegShiftReg(op == 0x7 ? 3 : (op - 0x2), true, op2, rs);
|
||||
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
|
||||
if (FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
Comp_RetriveFlags(false);
|
||||
}
|
||||
break;
|
||||
case 0x5:
|
||||
Comp_Arithmetic(0x5, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0x6:
|
||||
Comp_Arithmetic(0x6, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0x8:
|
||||
Comp_Compare(0x8, rd, Op2(rs));
|
||||
break;
|
||||
case 0x9:
|
||||
Comp_Arithmetic(0x3, true, rd, rs, Op2(0));
|
||||
break;
|
||||
case 0xA:
|
||||
Comp_Compare(0xA, rd, Op2(rs));
|
||||
break;
|
||||
case 0xB:
|
||||
Comp_Compare(0xB, rd, Op2(rs));
|
||||
break;
|
||||
case 0xC:
|
||||
Comp_Logical(0xC, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0xD:
|
||||
Comp_Mul_Mla(true, false, rd, rd, rs, INVALID_REG);
|
||||
break;
|
||||
case 0xE:
|
||||
Comp_Logical(0xE, true, rd, rd, Op2(rs));
|
||||
break;
|
||||
case 0xF:
|
||||
MVN(rd, rs);
|
||||
if (FlagsNZNeeded())
|
||||
TST(rd, rd);
|
||||
Comp_RetriveFlags(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_ALU_HiReg()
|
||||
{
|
||||
u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8));
|
||||
ARM64Reg rdMapped = MapReg(rd);
|
||||
ARM64Reg rs = MapReg((CurInstr.Instr >> 3) & 0xF);
|
||||
|
||||
u32 op = (CurInstr.Instr >> 8) & 0x3;
|
||||
|
||||
Comp_AddCycles_C();
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case 0:
|
||||
Comp_Arithmetic(0x4, false, rdMapped, rdMapped, Op2(rs));
|
||||
break;
|
||||
case 1:
|
||||
Comp_Compare(0xA, rdMapped, rs);
|
||||
return;
|
||||
case 2:
|
||||
MOV(rdMapped, rs);
|
||||
break;
|
||||
}
|
||||
|
||||
if (rd == 15)
|
||||
{
|
||||
Comp_JumpTo(rdMapped, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_AddSP()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
ARM64Reg sp = MapReg(13);
|
||||
u32 offset = (CurInstr.Instr & 0x7F) << 2;
|
||||
if (CurInstr.Instr & (1 << 7))
|
||||
SUB(sp, sp, offset);
|
||||
else
|
||||
ADD(sp, sp, offset);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_RelAddr()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
ARM64Reg rd = MapReg(CurInstr.T_Reg(8));
|
||||
u32 offset = (CurInstr.Instr & 0xFF) << 2;
|
||||
if (CurInstr.Instr & (1 << 11))
|
||||
{
|
||||
ARM64Reg sp = MapReg(13);
|
||||
ADD(rd, sp, offset);
|
||||
}
|
||||
else
|
||||
MOVI2R(rd, (R15 & ~2) + offset);
|
||||
}
|
||||
|
||||
}
|
452
src/ARMJIT_A64/ARMJIT_Branch.cpp
Normal file
452
src/ARMJIT_A64/ARMJIT_Branch.cpp
Normal file
@ -0,0 +1,452 @@
|
||||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
// hack
|
||||
const int kCodeCacheTiming = 3;
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
|
||||
{
|
||||
cpu->JumpTo(addr, changeCPSR);
|
||||
}
|
||||
|
||||
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
{
|
||||
// we can simplify constant branches by a lot
|
||||
// it's not completely safe to assume stuff like, which instructions to preload
|
||||
// we'll see how it works out
|
||||
|
||||
IrregularCycles = true;
|
||||
|
||||
u32 newPC;
|
||||
u32 cycles = 0;
|
||||
bool setupRegion = false;
|
||||
|
||||
if (addr & 0x1 && !Thumb)
|
||||
{
|
||||
CPSRDirty = true;
|
||||
ORRI2R(RCPSR, RCPSR, 0x20);
|
||||
}
|
||||
else if (!(addr & 0x1) && Thumb)
|
||||
{
|
||||
CPSRDirty = true;
|
||||
ANDI2R(RCPSR, RCPSR, ~0x20);
|
||||
}
|
||||
|
||||
if (Num == 0)
|
||||
{
|
||||
ARMv5* cpu9 = (ARMv5*)CurCPU;
|
||||
|
||||
u32 oldregion = R15 >> 24;
|
||||
u32 newregion = addr >> 24;
|
||||
|
||||
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
|
||||
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
|
||||
cpu9->RegionCodeCycles = regionCodeCycles;
|
||||
|
||||
MOVI2R(W0, regionCodeCycles);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
|
||||
|
||||
setupRegion = newregion != oldregion;
|
||||
if (setupRegion)
|
||||
cpu9->SetupCodeMem(addr);
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
addr &= ~0x1;
|
||||
newPC = addr+2;
|
||||
|
||||
// two-opcodes-at-once fetch
|
||||
// doesn't matter if we put garbage in the MSbs there
|
||||
if (addr & 0x2)
|
||||
{
|
||||
cpu9->CodeRead32(addr-2, true) >> 16;
|
||||
cycles += cpu9->CodeCycles;
|
||||
cpu9->CodeRead32(addr+2, false);
|
||||
cycles += CurCPU->CodeCycles;
|
||||
}
|
||||
else
|
||||
{
|
||||
cpu9->CodeRead32(addr, true);
|
||||
cycles += cpu9->CodeCycles;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addr &= ~0x3;
|
||||
newPC = addr+4;
|
||||
|
||||
cpu9->CodeRead32(addr, true);
|
||||
cycles += cpu9->CodeCycles;
|
||||
cpu9->CodeRead32(addr+4, false);
|
||||
cycles += cpu9->CodeCycles;
|
||||
}
|
||||
|
||||
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
||||
if (setupRegion)
|
||||
cpu9->SetupCodeMem(R15);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARMv4* cpu7 = (ARMv4*)CurCPU;
|
||||
|
||||
u32 codeRegion = addr >> 24;
|
||||
u32 codeCycles = addr >> 15; // cheato
|
||||
|
||||
cpu7->CodeRegion = codeRegion;
|
||||
cpu7->CodeCycles = codeCycles;
|
||||
|
||||
MOVI2R(W0, codeRegion);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
|
||||
MOVI2R(W0, codeCycles);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
addr &= ~0x1;
|
||||
newPC = addr+2;
|
||||
|
||||
// this is necessary because ARM7 bios protection
|
||||
u32 compileTimePC = CurCPU->R[15];
|
||||
CurCPU->R[15] = newPC;
|
||||
|
||||
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
|
||||
|
||||
CurCPU->R[15] = compileTimePC;
|
||||
}
|
||||
else
|
||||
{
|
||||
addr &= ~0x3;
|
||||
newPC = addr+4;
|
||||
|
||||
u32 compileTimePC = CurCPU->R[15];
|
||||
CurCPU->R[15] = newPC;
|
||||
|
||||
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
|
||||
|
||||
CurCPU->R[15] = compileTimePC;
|
||||
}
|
||||
|
||||
cpu7->CodeRegion = R15 >> 24;
|
||||
cpu7->CodeCycles = addr >> 15;
|
||||
}
|
||||
|
||||
if (Exit)
|
||||
{
|
||||
MOVI2R(W0, newPC);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
|
||||
}
|
||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
}
|
||||
|
||||
|
||||
void* Compiler::Gen_JumpTo9(int kind)
|
||||
{
|
||||
AlignCode16();
|
||||
void* res = GetRXPtr();
|
||||
|
||||
MOVI2R(W2, kCodeCacheTiming);
|
||||
// W1 - code cycles non branch
|
||||
// W2 - branch code cycles
|
||||
LSR(W1, W0, 12);
|
||||
LSL(W1, W1, 2);
|
||||
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
|
||||
LDRB(W1, RCPU, W1);
|
||||
|
||||
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
|
||||
|
||||
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
|
||||
|
||||
CMP(W0, W3);
|
||||
FixupBranch outsideITCM = B(CC_LO);
|
||||
MOVI2R(W1, 1);
|
||||
MOVI2R(W2, 1);
|
||||
SetJumpTarget(outsideITCM);
|
||||
|
||||
FixupBranch switchToThumb;
|
||||
if (kind == 0)
|
||||
switchToThumb = TBNZ(W0, 0);
|
||||
|
||||
if (kind == 0 || kind == 1)
|
||||
{
|
||||
ANDI2R(W0, W0, ~3);
|
||||
|
||||
if (kind == 0)
|
||||
ANDI2R(RCPSR, RCPSR, ~0x20);
|
||||
|
||||
ADD(W3, W0, 4);
|
||||
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
|
||||
|
||||
ADD(W1, W1, W2);
|
||||
ADD(RCycles, RCycles, W1);
|
||||
|
||||
RET();
|
||||
}
|
||||
if (kind == 0 || kind == 2)
|
||||
{
|
||||
if (kind == 0)
|
||||
{
|
||||
SetJumpTarget(switchToThumb);
|
||||
|
||||
ORRI2R(RCPSR, RCPSR, 0x20);
|
||||
}
|
||||
|
||||
ANDI2R(W0, W0, ~1);
|
||||
|
||||
ADD(W3, W0, 2);
|
||||
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
|
||||
|
||||
FixupBranch halfwordLoc = TBZ(W0, 1);
|
||||
ADD(W1, W1, W2);
|
||||
ADD(RCycles, RCycles, W1);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(halfwordLoc);
|
||||
ADD(RCycles, RCycles, W2);
|
||||
RET();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void* Compiler::Gen_JumpTo7(int kind)
|
||||
{
|
||||
void* res = GetRXPtr();
|
||||
|
||||
LSR(W1, W0, 24);
|
||||
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
|
||||
LSR(W1, W0, 15);
|
||||
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
|
||||
|
||||
MOVP2R(X2, NDS::ARM7MemTimings);
|
||||
LDR(W3, X2, ArithOption(W1, true));
|
||||
|
||||
FixupBranch switchToThumb;
|
||||
if (kind == 0)
|
||||
switchToThumb = TBNZ(W0, 0);
|
||||
|
||||
if (kind == 0 || kind == 1)
|
||||
{
|
||||
UBFX(W2, W3, 0, 8);
|
||||
UBFX(W3, W3, 8, 8);
|
||||
ADD(W2, W3, W2);
|
||||
ADD(RCycles, RCycles, W2);
|
||||
|
||||
ANDI2R(W0, W0, ~3);
|
||||
|
||||
if (kind == 0)
|
||||
ANDI2R(RCPSR, RCPSR, ~0x20);
|
||||
|
||||
ADD(W3, W0, 4);
|
||||
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
|
||||
|
||||
RET();
|
||||
}
|
||||
if (kind == 0 || kind == 2)
|
||||
{
|
||||
if (kind == 0)
|
||||
{
|
||||
SetJumpTarget(switchToThumb);
|
||||
|
||||
ORRI2R(RCPSR, RCPSR, 0x20);
|
||||
}
|
||||
|
||||
UBFX(W2, W3, 16, 8);
|
||||
UBFX(W3, W3, 24, 8);
|
||||
ADD(W2, W3, W2);
|
||||
ADD(RCycles, RCycles, W2);
|
||||
|
||||
ANDI2R(W0, W0, ~1);
|
||||
|
||||
ADD(W3, W0, 2);
|
||||
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
|
||||
|
||||
RET();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
if (!restoreCPSR)
|
||||
{
|
||||
if (switchThumb)
|
||||
CPSRDirty = true;
|
||||
MOV(W0, addr);
|
||||
BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
|
||||
bool previouslyDirty = CPSRDirty;
|
||||
SaveCPSR();
|
||||
|
||||
if (restoreCPSR)
|
||||
{
|
||||
if (Thumb || CurInstr.Cond() >= 0xE)
|
||||
RegCache.Flush();
|
||||
else
|
||||
{
|
||||
// the ugly way...
|
||||
// we only save them, to load and save them again
|
||||
for (int reg : hiRegsLoaded)
|
||||
SaveReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
}
|
||||
|
||||
if (switchThumb)
|
||||
MOV(W1, addr);
|
||||
else
|
||||
{
|
||||
if (Thumb)
|
||||
ORRI2R(W1, addr, 1);
|
||||
else
|
||||
ANDI2R(W1, addr, ~1);
|
||||
}
|
||||
MOV(X0, RCPU);
|
||||
MOVI2R(W2, restoreCPSR);
|
||||
if (Num == 0)
|
||||
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
|
||||
else
|
||||
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
|
||||
|
||||
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
|
||||
{
|
||||
for (int reg : hiRegsLoaded)
|
||||
LoadReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
|
||||
if (previouslyDirty)
|
||||
LoadCPSR();
|
||||
CPSRDirty = previouslyDirty;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_BranchImm()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 24) & 1;
|
||||
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
|
||||
u32 target = R15 + offset;
|
||||
bool link = op;
|
||||
|
||||
if (CurInstr.Cond() == 0xF) // BLX_imm
|
||||
{
|
||||
target += (op << 1) + 1;
|
||||
link = true;
|
||||
}
|
||||
|
||||
if (link)
|
||||
MOVI2R(MapReg(14), R15 - 4);
|
||||
|
||||
Comp_JumpTo(target);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_BranchXchangeReg()
|
||||
{
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
|
||||
MOV(W0, rn);
|
||||
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
|
||||
MOVI2R(MapReg(14), R15 - 4);
|
||||
Comp_JumpTo(W0, true);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BCOND()
|
||||
{
|
||||
u32 cond = (CurInstr.Instr >> 8) & 0xF;
|
||||
FixupBranch skipExecute = CheckCondition(cond);
|
||||
|
||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||
Comp_JumpTo(R15 + offset + 1, true);
|
||||
|
||||
Comp_BranchSpecialBehaviour();
|
||||
|
||||
FixupBranch skipFailed = B();
|
||||
SetJumpTarget(skipExecute);
|
||||
Comp_AddCycles_C(true);
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
SaveCPSR(false);
|
||||
RegCache.PrepareExit();
|
||||
|
||||
ADD(W0, RCycles, ConstantCycles);
|
||||
ABI_PopRegisters(SavedRegs);
|
||||
RET();
|
||||
}
|
||||
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_B()
|
||||
{
|
||||
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
|
||||
Comp_JumpTo(R15 + offset + 1);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BranchXchangeReg()
|
||||
{
|
||||
bool link = CurInstr.Instr & (1 << 7);
|
||||
|
||||
if (link)
|
||||
{
|
||||
if (Num == 1)
|
||||
{
|
||||
printf("BLX unsupported on ARM7!!!\n");
|
||||
return;
|
||||
}
|
||||
MOV(W0, MapReg(CurInstr.A_Reg(3)));
|
||||
MOVI2R(MapReg(14), R15 - 1);
|
||||
Comp_JumpTo(W0, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
|
||||
Comp_JumpTo(rn, true);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_LONG_1()
|
||||
{
|
||||
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
|
||||
MOVI2R(MapReg(14), R15 + offset);
|
||||
Comp_AddCycles_C();
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_LONG_2()
|
||||
{
|
||||
ARM64Reg lr = MapReg(14);
|
||||
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
|
||||
ADD(W0, lr, offset);
|
||||
MOVI2R(lr, (R15 - 2) | 1);
|
||||
Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_Merged()
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
|
||||
R15 += 2;
|
||||
|
||||
u32 upperPart = CurInstr.Instr >> 16;
|
||||
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
|
||||
target += (upperPart & 0x7FF) << 1;
|
||||
|
||||
if (Num == 1 || upperPart & (1 << 12))
|
||||
target |= 1;
|
||||
|
||||
MOVI2R(MapReg(14), (R15 - 2) | 1);
|
||||
|
||||
Comp_JumpTo(target);
|
||||
}
|
||||
|
||||
}
|
707
src/ARMJIT_A64/ARMJIT_Compiler.cpp
Normal file
707
src/ARMJIT_A64/ARMJIT_Compiler.cpp
Normal file
@ -0,0 +1,707 @@
|
||||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
#include "../ARMInterpreter.h"
|
||||
|
||||
#include "../ARMJIT_Internal.h"
|
||||
|
||||
#ifdef __SWITCH__
|
||||
#include "../switch/compat_switch.h"
|
||||
|
||||
extern char __start__;
|
||||
#endif
|
||||
|
||||
#include <malloc.h>
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
/*
|
||||
|
||||
Recompiling classic ARM to ARMv8 code is at the same time
|
||||
easier and trickier than compiling to a less related architecture
|
||||
like x64. At one hand you can translate a lot of instructions directly.
|
||||
But at the same time, there are a ton of exceptions, like for
|
||||
example ADD and SUB can't have a RORed second operand on ARMv8.
|
||||
*/
|
||||
|
||||
template <>
|
||||
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
|
||||
{W19, W20, W21, W22, W23, W24, W25, W26};
|
||||
template <>
|
||||
const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 8;
|
||||
|
||||
const int JitMemSize = 16 * 1024 * 1024;
|
||||
|
||||
void Compiler::MovePC()
|
||||
{
|
||||
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
|
||||
}
|
||||
|
||||
Compiler::Compiler()
|
||||
{
|
||||
#ifdef __SWITCH__
|
||||
JitRWBase = memalign(0x1000, JitMemSize);
|
||||
|
||||
JitRXStart = (u8*)&__start__ - JitMemSize - 0x1000;
|
||||
JitRWStart = virtmemReserve(JitMemSize);
|
||||
MemoryInfo info = {0};
|
||||
u32 pageInfo = {0};
|
||||
int i = 0;
|
||||
while (JitRXStart != NULL)
|
||||
{
|
||||
svcQueryMemory(&info, &pageInfo, (u64)JitRXStart);
|
||||
if (info.type != MemType_Unmapped)
|
||||
JitRXStart = (void*)((u8*)info.addr - JitMemSize - 0x1000);
|
||||
else
|
||||
break;
|
||||
if (i++ > 8)
|
||||
{
|
||||
printf("couldn't find unmapped place for jit memory\n");
|
||||
JitRXStart = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
assert(JitRXStart != NULL);
|
||||
|
||||
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
|
||||
assert(succeded);
|
||||
succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize, Perm_Rx));
|
||||
assert(succeded);
|
||||
succeded = R_SUCCEEDED(svcMapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
|
||||
assert(succeded);
|
||||
|
||||
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
|
||||
JitMemUseableSize = JitMemSize;
|
||||
Reset();
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
|
||||
}
|
||||
}
|
||||
MemFunc7[0][0] = (void*)NDS::ARM7Read8;
|
||||
MemFunc7[1][0] = (void*)NDS::ARM7Read16;
|
||||
MemFunc7[2][0] = (void*)NDS::ARM7Read32;
|
||||
MemFunc7[0][1] = (void*)NDS::ARM7Write8;
|
||||
MemFunc7[1][1] = (void*)NDS::ARM7Write16;
|
||||
MemFunc7[2][1] = (void*)NDS::ARM7Write32;
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
|
||||
MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
JumpToFuncs9[i] = Gen_JumpTo9(i);
|
||||
JumpToFuncs7[i] = Gen_JumpTo7(i);
|
||||
}
|
||||
|
||||
/*
|
||||
W0 - mode
|
||||
W1 - reg num
|
||||
W3 - in/out value of reg
|
||||
*/
|
||||
{
|
||||
ReadBanked = GetRXPtr();
|
||||
|
||||
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
|
||||
CMP(W0, 0x11);
|
||||
FixupBranch fiq = B(CC_EQ);
|
||||
SUBS(W1, W1, 13 - 8);
|
||||
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
|
||||
FixupBranch notEverything = B(CC_LT);
|
||||
CMP(W0, 0x12);
|
||||
FixupBranch irq = B(CC_EQ);
|
||||
CMP(W0, 0x13);
|
||||
FixupBranch svc = B(CC_EQ);
|
||||
CMP(W0, 0x17);
|
||||
FixupBranch abt = B(CC_EQ);
|
||||
CMP(W0, 0x1B);
|
||||
FixupBranch und = B(CC_EQ);
|
||||
SetJumpTarget(notEverything);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(fiq);
|
||||
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
|
||||
RET();
|
||||
SetJumpTarget(irq);
|
||||
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
|
||||
RET();
|
||||
SetJumpTarget(svc);
|
||||
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
|
||||
RET();
|
||||
SetJumpTarget(abt);
|
||||
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
|
||||
RET();
|
||||
SetJumpTarget(und);
|
||||
LDR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
|
||||
RET();
|
||||
}
|
||||
{
|
||||
WriteBanked = GetRXPtr();
|
||||
|
||||
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
|
||||
CMP(W0, 0x11);
|
||||
FixupBranch fiq = B(CC_EQ);
|
||||
SUBS(W1, W1, 13 - 8);
|
||||
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
|
||||
FixupBranch notEverything = B(CC_LT);
|
||||
CMP(W0, 0x12);
|
||||
FixupBranch irq = B(CC_EQ);
|
||||
CMP(W0, 0x13);
|
||||
FixupBranch svc = B(CC_EQ);
|
||||
CMP(W0, 0x17);
|
||||
FixupBranch abt = B(CC_EQ);
|
||||
CMP(W0, 0x1B);
|
||||
FixupBranch und = B(CC_EQ);
|
||||
SetJumpTarget(notEverything);
|
||||
MOVI2R(W4, 0);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(fiq);
|
||||
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_FIQ));
|
||||
MOVI2R(W4, 1);
|
||||
RET();
|
||||
SetJumpTarget(irq);
|
||||
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_IRQ));
|
||||
MOVI2R(W4, 1);
|
||||
RET();
|
||||
SetJumpTarget(svc);
|
||||
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_SVC));
|
||||
MOVI2R(W4, 1);
|
||||
RET();
|
||||
SetJumpTarget(abt);
|
||||
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_ABT));
|
||||
MOVI2R(W4, 1);
|
||||
RET();
|
||||
SetJumpTarget(und);
|
||||
STR(INDEX_UNSIGNED, W3, X2, offsetof(ARM, R_UND));
|
||||
MOVI2R(W4, 1);
|
||||
RET();
|
||||
}
|
||||
|
||||
//FlushIcache();
|
||||
|
||||
JitMemUseableSize -= GetCodeOffset();
|
||||
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
|
||||
}
|
||||
|
||||
Compiler::~Compiler()
|
||||
{
|
||||
#ifdef __SWITCH__
|
||||
if (JitRWStart != NULL)
|
||||
{
|
||||
bool succeded = R_SUCCEEDED(svcUnmapProcessMemory(JitRWStart, envGetOwnProcessHandle(), (u64)JitRXStart, JitMemSize));
|
||||
assert(succeded);
|
||||
virtmemFree(JitRWStart, JitMemSize);
|
||||
succeded = R_SUCCEEDED(svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)JitRXStart, (u64)JitRWBase, JitMemSize));
|
||||
assert(succeded);
|
||||
free(JitRWBase);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
|
||||
{
|
||||
if (reg == 15)
|
||||
MOVI2R(nativeReg, R15);
|
||||
else
|
||||
LDR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
|
||||
}
|
||||
|
||||
void Compiler::SaveReg(int reg, ARM64Reg nativeReg)
|
||||
{
|
||||
STR(INDEX_UNSIGNED, nativeReg, RCPU, offsetof(ARM, R[reg]));
|
||||
}
|
||||
|
||||
void Compiler::LoadCPSR()
|
||||
{
|
||||
assert(!CPSRDirty);
|
||||
LDR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
|
||||
}
|
||||
|
||||
void Compiler::SaveCPSR(bool markClean)
|
||||
{
|
||||
if (CPSRDirty)
|
||||
{
|
||||
STR(INDEX_UNSIGNED, RCPSR, RCPU, offsetof(ARM, CPSR));
|
||||
CPSRDirty = CPSRDirty && !markClean;
|
||||
}
|
||||
}
|
||||
|
||||
FixupBranch Compiler::CheckCondition(u32 cond)
|
||||
{
|
||||
if (cond >= 0x8)
|
||||
{
|
||||
LSR(W1, RCPSR, 28);
|
||||
MOVI2R(W2, 1);
|
||||
LSLV(W2, W2, W1);
|
||||
ANDI2R(W2, W2, ARM::ConditionTable[cond], W3);
|
||||
|
||||
return CBZ(W2);
|
||||
}
|
||||
else
|
||||
{
|
||||
u8 bit = (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)));
|
||||
|
||||
if (cond & 1)
|
||||
return TBNZ(RCPSR, bit);
|
||||
else
|
||||
return TBZ(RCPSR, bit);
|
||||
}
|
||||
}
|
||||
|
||||
#define F(x) &Compiler::A_Comp_##x
|
||||
const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
|
||||
{
|
||||
// AND
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// EOR
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// SUB
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// RSB
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// ADD
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// ADC
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// SBC
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// RSC
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// ORR
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// MOV
|
||||
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
|
||||
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
|
||||
// BIC
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp), F(ALUTriOp),
|
||||
// MVN
|
||||
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
|
||||
F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp), F(ALUMovOp),
|
||||
// TST
|
||||
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
|
||||
// TEQ
|
||||
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
|
||||
// CMP
|
||||
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
|
||||
// CMN
|
||||
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
|
||||
// Mul
|
||||
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
|
||||
// ARMv5 exclusives
|
||||
F(Clz), NULL, NULL, NULL, NULL,
|
||||
|
||||
// STR
|
||||
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
|
||||
// STRB
|
||||
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
|
||||
// LDR
|
||||
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
|
||||
// LDRB
|
||||
F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB), F(MemWB),
|
||||
// STRH
|
||||
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
|
||||
// LDRD
|
||||
NULL, NULL, NULL, NULL,
|
||||
// STRD
|
||||
NULL, NULL, NULL, NULL,
|
||||
// LDRH
|
||||
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
|
||||
// LDRSB
|
||||
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
|
||||
// LDRSH
|
||||
F(MemHD), F(MemHD), F(MemHD), F(MemHD),
|
||||
// Swap
|
||||
NULL, NULL,
|
||||
// LDM, STM
|
||||
F(LDM_STM), F(LDM_STM),
|
||||
// Branch
|
||||
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
|
||||
// Special
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL
|
||||
};
|
||||
#undef F
|
||||
#define F(x) &Compiler::T_Comp_##x
|
||||
const Compiler::CompileFunc T_Comp[ARMInstrInfo::tk_Count] =
|
||||
{
|
||||
// Shift imm
|
||||
F(ShiftImm), F(ShiftImm), F(ShiftImm),
|
||||
// Add/sub tri operand
|
||||
F(AddSub_), F(AddSub_), F(AddSub_), F(AddSub_),
|
||||
// 8 bit imm
|
||||
F(ALUImm8), F(ALUImm8), F(ALUImm8), F(ALUImm8),
|
||||
// ALU
|
||||
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
|
||||
F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU), F(ALU),
|
||||
// ALU hi reg
|
||||
F(ALU_HiReg), F(ALU_HiReg), F(ALU_HiReg),
|
||||
// PC/SP relative ops
|
||||
F(RelAddr), F(RelAddr), F(AddSP),
|
||||
// LDR PC rel
|
||||
F(LoadPCRel),
|
||||
// LDR/STR reg offset
|
||||
F(MemReg), F(MemReg), F(MemReg), F(MemReg),
|
||||
// LDR/STR sign extended, half
|
||||
F(MemRegHalf), F(MemRegHalf), F(MemRegHalf), F(MemRegHalf),
|
||||
// LDR/STR imm offset
|
||||
F(MemImm), F(MemImm), F(MemImm), F(MemImm),
|
||||
// LDR/STR half imm offset
|
||||
F(MemImmHalf), F(MemImmHalf),
|
||||
// LDR/STR sp rel
|
||||
F(MemSPRel), F(MemSPRel),
|
||||
// PUSH/POP
|
||||
F(PUSH_POP), F(PUSH_POP),
|
||||
// LDMIA, STMIA
|
||||
F(LDMIA_STMIA), F(LDMIA_STMIA),
|
||||
// Branch
|
||||
F(BCOND), F(BranchXchangeReg), F(BranchXchangeReg), F(B), F(BL_LONG_1), F(BL_LONG_2),
|
||||
// Unk, SVC
|
||||
NULL, NULL,
|
||||
F(BL_Merged)
|
||||
};
|
||||
|
||||
bool Compiler::CanCompile(bool thumb, u16 kind)
|
||||
{
|
||||
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
|
||||
}
|
||||
|
||||
void Compiler::Comp_BranchSpecialBehaviour()
|
||||
{
|
||||
if (CurInstr.BranchFlags & branch_IdleBranch)
|
||||
{
|
||||
MOVI2R(W0, 1);
|
||||
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
|
||||
}
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
||||
{
|
||||
SaveCPSR(false);
|
||||
RegCache.PrepareExit();
|
||||
ADD(W0, RCycles, ConstantCycles);
|
||||
ABI_PopRegisters(SavedRegs);
|
||||
RET();
|
||||
}
|
||||
}
|
||||
|
||||
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
{
|
||||
if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
|
||||
{
|
||||
printf("JIT memory full, resetting...\n");
|
||||
ResetBlockCache();
|
||||
}
|
||||
|
||||
JitBlockEntry res = (JitBlockEntry)GetRXPtr();
|
||||
|
||||
Thumb = thumb;
|
||||
Num = cpu->Num;
|
||||
CurCPU = cpu;
|
||||
ConstantCycles = 0;
|
||||
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
|
||||
|
||||
//printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
|
||||
SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
|
||||
|
||||
//if (Num == 1)
|
||||
{
|
||||
ABI_PushRegisters(SavedRegs);
|
||||
|
||||
MOVP2R(RCPU, CurCPU);
|
||||
MOVI2R(RCycles, 0);
|
||||
|
||||
LoadCPSR();
|
||||
}
|
||||
|
||||
for (int i = 0; i < instrsCount; i++)
|
||||
{
|
||||
CurInstr = instrs[i];
|
||||
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
||||
CodeRegion = R15 >> 24;
|
||||
|
||||
CompileFunc comp = Thumb
|
||||
? T_Comp[CurInstr.Info.Kind]
|
||||
: A_Comp[CurInstr.Info.Kind];
|
||||
|
||||
Exit = i == (instrsCount - 1) || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
|
||||
|
||||
//printf("%x instr %x regs: r%x w%x n%x flags: %x %x %x\n", R15, CurInstr.Instr, CurInstr.Info.SrcRegs, CurInstr.Info.DstRegs, CurInstr.Info.ReadFlags, CurInstr.Info.NotStrictlyNeeded, CurInstr.Info.WriteFlags, CurInstr.SetFlags);
|
||||
|
||||
bool isConditional = Thumb ? CurInstr.Info.Kind == ARMInstrInfo::tk_BCOND : CurInstr.Cond() < 0xE;
|
||||
if (comp == NULL || (CurInstr.BranchFlags & branch_FollowCondTaken) || (i == instrsCount - 1 && (!CurInstr.Info.Branches() || isConditional)))
|
||||
{
|
||||
MOVI2R(W0, R15);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOVI2R(W0, CurInstr.Instr);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CurInstr));
|
||||
}
|
||||
if (Num == 0)
|
||||
{
|
||||
MOVI2R(W0, (s32)CurInstr.CodeCycles);
|
||||
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
|
||||
}
|
||||
}
|
||||
|
||||
if (comp == NULL)
|
||||
{
|
||||
SaveCPSR();
|
||||
RegCache.Flush();
|
||||
}
|
||||
else
|
||||
RegCache.Prepare(Thumb, i);
|
||||
|
||||
if (Thumb)
|
||||
{
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOV(X0, RCPU);
|
||||
QuickCallFunction(X1, InterpretTHUMB[CurInstr.Info.Kind]);
|
||||
}
|
||||
else
|
||||
(this->*comp)();
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cond = CurInstr.Cond();
|
||||
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
|
||||
{
|
||||
if (comp)
|
||||
(this->*comp)();
|
||||
else
|
||||
{
|
||||
MOV(X0, RCPU);
|
||||
QuickCallFunction(X1, ARMInterpreter::A_BLX_IMM);
|
||||
}
|
||||
}
|
||||
else if (cond == 0xF)
|
||||
Comp_AddCycles_C();
|
||||
else
|
||||
{
|
||||
IrregularCycles = false;
|
||||
|
||||
FixupBranch skipExecute;
|
||||
if (cond < 0xE)
|
||||
skipExecute = CheckCondition(cond);
|
||||
|
||||
if (comp == NULL)
|
||||
{
|
||||
MOV(X0, RCPU);
|
||||
QuickCallFunction(X1, InterpretARM[CurInstr.Info.Kind]);
|
||||
}
|
||||
else
|
||||
{
|
||||
(this->*comp)();
|
||||
}
|
||||
|
||||
Comp_BranchSpecialBehaviour();
|
||||
|
||||
if (cond < 0xE)
|
||||
{
|
||||
if (IrregularCycles)
|
||||
{
|
||||
FixupBranch skipNop = B();
|
||||
SetJumpTarget(skipExecute);
|
||||
|
||||
Comp_AddCycles_C();
|
||||
|
||||
if (CurInstr.BranchFlags & branch_FollowCondTaken)
|
||||
{
|
||||
SaveCPSR(false);
|
||||
RegCache.PrepareExit();
|
||||
ADD(W0, RCycles, ConstantCycles);
|
||||
ABI_PopRegisters(SavedRegs);
|
||||
RET();
|
||||
}
|
||||
|
||||
SetJumpTarget(skipNop);
|
||||
}
|
||||
else
|
||||
SetJumpTarget(skipExecute);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (comp == NULL)
|
||||
LoadCPSR();
|
||||
}
|
||||
|
||||
RegCache.Flush();
|
||||
|
||||
//if (Num == 1)
|
||||
{
|
||||
SaveCPSR();
|
||||
|
||||
ADD(W0, RCycles, ConstantCycles);
|
||||
|
||||
ABI_PopRegisters(SavedRegs);
|
||||
}
|
||||
//else
|
||||
// ADD(RCycles, RCycles, ConstantCycles);
|
||||
|
||||
RET();
|
||||
|
||||
FlushIcache();
|
||||
|
||||
//printf("finished\n");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void Compiler::Reset()
|
||||
{
|
||||
SetCodePtr(0);
|
||||
|
||||
const u32 brk_0 = 0xD4200000;
|
||||
|
||||
for (int i = 0; i < JitMemUseableSize / 4; i++)
|
||||
*(((u32*)GetRWPtr()) + i) = brk_0;
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_C(bool nonConst)
|
||||
{
|
||||
s32 cycles = Num ?
|
||||
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
||||
|
||||
if (!nonConst && !CurInstr.Info.Branches())
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CI(u32 numI)
|
||||
{
|
||||
s32 cycles = (Num ?
|
||||
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
|
||||
|
||||
if (Thumb || CurInstr.Cond() >= 0xE)
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
|
||||
{
|
||||
s32 cycles = (Num ?
|
||||
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
|
||||
|
||||
ADD(RCycles, RCycles, numI, shift);
|
||||
if (Thumb || CurInstr.Cond() >= 0xE)
|
||||
ConstantCycles += c;
|
||||
else
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CDI()
|
||||
{
|
||||
if (Num == 0)
|
||||
Comp_AddCycles_CD();
|
||||
else
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
s32 cycles;
|
||||
|
||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if (CurInstr.DataRegion == 0x02) // mainRAM
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles = numC + numD;
|
||||
else
|
||||
{
|
||||
numC++;
|
||||
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
}
|
||||
else if (CodeRegion == 0x02)
|
||||
{
|
||||
numD++;
|
||||
cycles = std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else
|
||||
{
|
||||
cycles = numC + numD + 1;
|
||||
}
|
||||
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_CD()
|
||||
{
|
||||
u32 cycles = 0;
|
||||
if (Num == 0)
|
||||
{
|
||||
s32 numC = (R15 & 0x2) ? 0 : CurInstr.CodeCycles;
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
//if (DataRegion != CodeRegion)
|
||||
cycles = std::max(numC + numD - 6, std::max(numC, numD));
|
||||
|
||||
IrregularCycles = cycles != numC;
|
||||
}
|
||||
else
|
||||
{
|
||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if (CurInstr.DataRegion == 0x02)
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles += numC + numD;
|
||||
else
|
||||
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else if (CodeRegion == 0x02)
|
||||
{
|
||||
cycles += std::max(numC + numD - 3, std::max(numC, numD));
|
||||
}
|
||||
else
|
||||
{
|
||||
cycles += numC + numD;
|
||||
}
|
||||
|
||||
IrregularCycles = true;
|
||||
}
|
||||
|
||||
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
|
||||
ADD(RCycles, RCycles, cycles);
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
|
||||
}
|
234
src/ARMJIT_A64/ARMJIT_Compiler.h
Normal file
234
src/ARMJIT_A64/ARMJIT_Compiler.h
Normal file
@ -0,0 +1,234 @@
|
||||
#ifndef ARMJIT_COMPILER_H
|
||||
#define ARMJIT_COMPILER_H
|
||||
|
||||
#include "../ARM.h"
|
||||
#include "../ARMJIT.h"
|
||||
|
||||
#include "../dolphin/Arm64Emitter.h"
|
||||
|
||||
#include "../ARMJIT_Internal.h"
|
||||
#include "../ARMJIT_RegisterCache.h"
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
const Arm64Gen::ARM64Reg RCPSR = Arm64Gen::W27;
|
||||
const Arm64Gen::ARM64Reg RCycles = Arm64Gen::W28;
|
||||
const Arm64Gen::ARM64Reg RCPU = Arm64Gen::X29;
|
||||
|
||||
struct Op2
|
||||
{
|
||||
Op2()
|
||||
{}
|
||||
|
||||
Op2(Arm64Gen::ARM64Reg rm) : IsImm(false)
|
||||
{
|
||||
Reg.Rm = rm;
|
||||
Reg.ShiftType = Arm64Gen::ST_LSL;
|
||||
Reg.ShiftAmount = 0;
|
||||
}
|
||||
|
||||
Op2(u32 imm) : IsImm(true), Imm(imm)
|
||||
{}
|
||||
|
||||
Op2(Arm64Gen::ARM64Reg rm, Arm64Gen::ShiftType st, int amount) : IsImm(false)
|
||||
{
|
||||
Reg.Rm = rm;
|
||||
Reg.ShiftType = st;
|
||||
Reg.ShiftAmount = amount;
|
||||
}
|
||||
|
||||
Arm64Gen::ArithOption ToArithOption()
|
||||
{
|
||||
assert(!IsImm);
|
||||
return Arm64Gen::ArithOption(Reg.Rm, Reg.ShiftType, Reg.ShiftAmount);
|
||||
}
|
||||
|
||||
bool IsSimpleReg()
|
||||
{ return !IsImm && !Reg.ShiftAmount && Reg.ShiftType == Arm64Gen::ST_LSL; }
|
||||
bool ImmFits12Bit()
|
||||
{ return IsImm && (Imm & 0xFFF == Imm); }
|
||||
bool IsZero()
|
||||
{ return IsImm && !Imm; }
|
||||
|
||||
bool IsImm;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
Arm64Gen::ARM64Reg Rm;
|
||||
Arm64Gen::ShiftType ShiftType;
|
||||
int ShiftAmount;
|
||||
} Reg;
|
||||
u32 Imm;
|
||||
};
|
||||
};
|
||||
|
||||
class Compiler : Arm64Gen::ARM64XEmitter
|
||||
{
|
||||
public:
|
||||
typedef void (Compiler::*CompileFunc)();
|
||||
|
||||
Compiler();
|
||||
~Compiler();
|
||||
|
||||
Arm64Gen::ARM64Reg MapReg(int reg)
|
||||
{
|
||||
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
|
||||
return RegCache.Mapping[reg];
|
||||
}
|
||||
|
||||
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
|
||||
|
||||
bool CanCompile(bool thumb, u16 kind);
|
||||
|
||||
bool FlagsNZNeeded()
|
||||
{
|
||||
return CurInstr.SetFlags & 0xC;
|
||||
}
|
||||
|
||||
void Reset();
|
||||
|
||||
void Comp_AddCycles_C(bool forceNonConst = false);
|
||||
void Comp_AddCycles_CI(u32 numI);
|
||||
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
|
||||
void Comp_AddCycles_CD();
|
||||
void Comp_AddCycles_CDI();
|
||||
|
||||
void MovePC();
|
||||
|
||||
void LoadReg(int reg, Arm64Gen::ARM64Reg nativeReg);
|
||||
void SaveReg(int reg, Arm64Gen::ARM64Reg nativeReg);
|
||||
|
||||
void LoadCPSR();
|
||||
void SaveCPSR(bool markClean = true);
|
||||
|
||||
void A_Comp_ALUTriOp();
|
||||
void A_Comp_ALUMovOp();
|
||||
void A_Comp_ALUCmpOp();
|
||||
|
||||
void A_Comp_Mul();
|
||||
void A_Comp_Mul_Long();
|
||||
|
||||
void A_Comp_Clz();
|
||||
|
||||
void A_Comp_MemWB();
|
||||
void A_Comp_MemHD();
|
||||
|
||||
void A_Comp_LDM_STM();
|
||||
|
||||
void A_Comp_BranchImm();
|
||||
void A_Comp_BranchXchangeReg();
|
||||
|
||||
|
||||
void T_Comp_ShiftImm();
|
||||
void T_Comp_AddSub_();
|
||||
void T_Comp_ALUImm8();
|
||||
void T_Comp_ALU();
|
||||
void T_Comp_ALU_HiReg();
|
||||
void T_Comp_AddSP();
|
||||
void T_Comp_RelAddr();
|
||||
|
||||
void T_Comp_MemReg();
|
||||
void T_Comp_MemImm();
|
||||
void T_Comp_MemRegHalf();
|
||||
void T_Comp_MemImmHalf();
|
||||
void T_Comp_LoadPCRel();
|
||||
void T_Comp_MemSPRel();
|
||||
|
||||
void T_Comp_LDMIA_STMIA();
|
||||
void T_Comp_PUSH_POP();
|
||||
|
||||
void T_Comp_BCOND();
|
||||
void T_Comp_B();
|
||||
void T_Comp_BranchXchangeReg();
|
||||
void T_Comp_BL_LONG_1();
|
||||
void T_Comp_BL_LONG_2();
|
||||
void T_Comp_BL_Merged();
|
||||
|
||||
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||
|
||||
void Comp_Mul_Mla(bool S, bool mla, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rm, Arm64Gen::ARM64Reg rs, Arm64Gen::ARM64Reg rn);
|
||||
|
||||
void Comp_Compare(int op, Arm64Gen::ARM64Reg rn, Op2 op2);
|
||||
void Comp_Logical(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
|
||||
void Comp_Arithmetic(int op, bool S, Arm64Gen::ARM64Reg rd, Arm64Gen::ARM64Reg rn, Op2 op2);
|
||||
|
||||
void Comp_RetriveFlags(bool retriveCV);
|
||||
|
||||
Arm64Gen::FixupBranch CheckCondition(u32 cond);
|
||||
|
||||
void Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR = false);
|
||||
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
|
||||
|
||||
void A_Comp_GetOp2(bool S, Op2& op2);
|
||||
|
||||
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
|
||||
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
|
||||
|
||||
void Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
|
||||
enum
|
||||
{
|
||||
memop_Writeback = 1 << 0,
|
||||
memop_Post = 1 << 1,
|
||||
memop_SignExtend = 1 << 2,
|
||||
memop_Store = 1 << 3,
|
||||
memop_SubtractOffset = 1 << 4
|
||||
};
|
||||
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
|
||||
|
||||
void* Gen_MemoryRoutine9(int size, bool store);
|
||||
|
||||
void* Gen_MemoryRoutine9Seq(bool store, bool preinc);
|
||||
void* Gen_MemoryRoutine7Seq(bool store, bool preinc);
|
||||
|
||||
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
|
||||
void* Gen_JumpTo9(int kind);
|
||||
void* Gen_JumpTo7(int kind);
|
||||
|
||||
void Comp_BranchSpecialBehaviour();
|
||||
|
||||
bool Exit;
|
||||
|
||||
FetchedInstr CurInstr;
|
||||
bool Thumb;
|
||||
u32 R15;
|
||||
u32 Num;
|
||||
ARM* CurCPU;
|
||||
u32 ConstantCycles;
|
||||
u32 CodeRegion;
|
||||
|
||||
BitSet32 SavedRegs;
|
||||
|
||||
u32 JitMemUseableSize;
|
||||
|
||||
void* ReadBanked, *WriteBanked;
|
||||
|
||||
// [size][store]
|
||||
void* MemFunc9[3][2];
|
||||
void* MemFunc7[3][2];
|
||||
|
||||
// [store][pre increment]
|
||||
void* MemFuncsSeq9[2][2];
|
||||
// "[code in main ram]
|
||||
void* MemFuncsSeq7[2][2];
|
||||
|
||||
void* JumpToFuncs9[3];
|
||||
void* JumpToFuncs7[3];
|
||||
|
||||
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
|
||||
|
||||
bool CPSRDirty = false;
|
||||
|
||||
bool IrregularCycles = false;
|
||||
|
||||
#ifdef __SWITCH__
|
||||
void* JitRWBase;
|
||||
void* JitRWStart;
|
||||
void* JitRXStart;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
848
src/ARMJIT_A64/ARMJIT_LoadStore.cpp
Normal file
848
src/ARMJIT_A64/ARMJIT_LoadStore.cpp
Normal file
@ -0,0 +1,848 @@
|
||||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
#include "../Config.h"
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
// W0 - address
|
||||
// (if store) W1 - value to store
|
||||
// W2 - code cycles
|
||||
void* Compiler::Gen_MemoryRoutine9(int size, bool store)
|
||||
{
|
||||
AlignCode16();
|
||||
void* res = GetRXPtr();
|
||||
|
||||
u32 addressMask;
|
||||
switch (size)
|
||||
{
|
||||
case 32: addressMask = ~3; break;
|
||||
case 16: addressMask = ~1; break;
|
||||
case 8: addressMask = ~0; break;
|
||||
}
|
||||
|
||||
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
|
||||
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
|
||||
SUB(W3, W0, W3);
|
||||
CMP(W3, W4);
|
||||
FixupBranch insideDTCM = B(CC_LO);
|
||||
|
||||
UBFX(W4, W0, 24, 8);
|
||||
CMP(W4, 0x02);
|
||||
FixupBranch outsideMainRAM = B(CC_NEQ);
|
||||
ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
|
||||
MOVP2R(X4, NDS::MainRAM);
|
||||
if (!store && size == 32)
|
||||
{
|
||||
LDR(W3, X3, X4);
|
||||
ANDI2R(W0, W0, 3);
|
||||
LSL(W0, W0, 3);
|
||||
RORV(W0, W3, W0);
|
||||
}
|
||||
else if (store)
|
||||
STRGeneric(size, W1, X3, X4);
|
||||
else
|
||||
LDRGeneric(size, false, W0, X3, X4);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(outsideMainRAM);
|
||||
|
||||
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
|
||||
CMP(W0, W3);
|
||||
FixupBranch insideITCM = B(CC_LO);
|
||||
|
||||
if (store)
|
||||
{
|
||||
if (size > 8)
|
||||
ANDI2R(W0, W0, addressMask);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
|
||||
case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
|
||||
case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (size == 32)
|
||||
ABI_PushRegisters({0, 30});
|
||||
if (size > 8)
|
||||
ANDI2R(W0, W0, addressMask);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
|
||||
case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
|
||||
case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
|
||||
}
|
||||
if (size == 32)
|
||||
{
|
||||
ABI_PopRegisters({1, 30});
|
||||
ANDI2R(W1, W1, 3);
|
||||
LSL(W1, W1, 3);
|
||||
RORV(W0, W0, W1);
|
||||
RET();
|
||||
}
|
||||
}
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
ANDI2R(W3, W3, 0x3FFF & addressMask);
|
||||
ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
|
||||
if (!store && size == 32)
|
||||
{
|
||||
ANDI2R(W4, W0, 3);
|
||||
LDR(W0, RCPU, W3);
|
||||
LSL(W4, W4, 3);
|
||||
RORV(W0, W0, W4);
|
||||
}
|
||||
else if (store)
|
||||
STRGeneric(size, W1, RCPU, W3);
|
||||
else
|
||||
LDRGeneric(size, false, W0, RCPU, W3);
|
||||
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
ANDI2R(W3, W0, 0x7FFF & addressMask);
|
||||
if (store)
|
||||
{
|
||||
LSR(W0, W3, 8);
|
||||
ADDI2R(W0, W0, ExeMemRegionOffsets[exeMem_ITCM], W4);
|
||||
MOVP2R(X4, CodeRanges);
|
||||
ADD(X4, X4, X0, ArithOption(X0, ST_LSL, 4));
|
||||
static_assert(sizeof(AddressRange) == 16);
|
||||
LDR(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
|
||||
FixupBranch null = CBZ(W4);
|
||||
ABI_PushRegisters({1, 3, 30});
|
||||
QuickCallFunction(X4, InvalidateByAddr);
|
||||
ABI_PopRegisters({1, 3, 30});
|
||||
SetJumpTarget(null);
|
||||
}
|
||||
ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
|
||||
if (!store && size == 32)
|
||||
{
|
||||
ANDI2R(W4, W0, 3);
|
||||
LDR(W0, RCPU, W3);
|
||||
LSL(W4, W4, 3);
|
||||
RORV(W0, W0, W4);
|
||||
}
|
||||
else if (store)
|
||||
STRGeneric(size, W1, RCPU, W3);
|
||||
else
|
||||
LDRGeneric(size, false, W0, RCPU, W3);
|
||||
RET();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
W0 - base address
|
||||
X1 - stack space
|
||||
W2 - values count
|
||||
*/
|
||||
void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
|
||||
{
|
||||
AlignCode16();
|
||||
void* res = GetRXPtr();
|
||||
|
||||
void* loopStart = GetRXPtr();
|
||||
SUB(W2, W2, 1);
|
||||
|
||||
if (preinc)
|
||||
ADD(W0, W0, 4);
|
||||
|
||||
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
|
||||
LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
|
||||
SUB(W4, W0, W4);
|
||||
CMP(W4, W5);
|
||||
FixupBranch insideDTCM = B(CC_LO);
|
||||
|
||||
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
|
||||
CMP(W0, W4);
|
||||
FixupBranch insideITCM = B(CC_LO);
|
||||
|
||||
ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
|
||||
if (store)
|
||||
{
|
||||
LDR(X1, X1, ArithOption(X2, true));
|
||||
QuickCallFunction(X4, NDS::ARM9Write32);
|
||||
|
||||
ABI_PopRegisters({0, 1, 2, 30});
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCallFunction(X4, NDS::ARM9Read32);
|
||||
MOV(W4, W0);
|
||||
|
||||
ABI_PopRegisters({0, 1, 2, 30});
|
||||
|
||||
STR(X4, X1, ArithOption(X2, true));
|
||||
}
|
||||
|
||||
if (!preinc)
|
||||
ADD(W0, W0, 4);
|
||||
CBNZ(W2, loopStart);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
|
||||
ANDI2R(W4, W4, ~3 & 0x3FFF);
|
||||
ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
|
||||
if (store)
|
||||
{
|
||||
LDR(X5, X1, ArithOption(X2, true));
|
||||
STR(W5, RCPU, X4);
|
||||
}
|
||||
else
|
||||
{
|
||||
LDR(W5, RCPU, X4);
|
||||
STR(X5, X1, ArithOption(X2, true));
|
||||
}
|
||||
|
||||
if (!preinc)
|
||||
ADD(W0, W0, 4);
|
||||
CBNZ(W2, loopStart);
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
|
||||
ANDI2R(W4, W0, ~3 & 0x7FFF);
|
||||
|
||||
if (store)
|
||||
{
|
||||
LSR(W6, W4, 8);
|
||||
ADDI2R(W6, W6, ExeMemRegionOffsets[exeMem_ITCM], W5);
|
||||
MOVP2R(X5, CodeRanges);
|
||||
ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
|
||||
static_assert(sizeof(AddressRange) == 16);
|
||||
LDR(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
|
||||
FixupBranch null = CBZ(W5);
|
||||
ABI_PushRegisters({0, 1, 2, 4, 30});
|
||||
MOV(W0, W6);
|
||||
QuickCallFunction(X5, InvalidateByAddr);
|
||||
ABI_PopRegisters({0, 1, 2, 4, 30});
|
||||
SetJumpTarget(null);
|
||||
}
|
||||
|
||||
ADDI2R(W4, W4, offsetof(ARMv5, ITCM), W5);
|
||||
if (store)
|
||||
{
|
||||
LDR(X5, X1, ArithOption(X2, true));
|
||||
STR(W5, RCPU, X4);
|
||||
}
|
||||
else
|
||||
{
|
||||
LDR(W5, RCPU, X4);
|
||||
STR(X5, X1, ArithOption(X2, true));
|
||||
}
|
||||
|
||||
if (!preinc)
|
||||
ADD(W0, W0, 4);
|
||||
CBNZ(W2, loopStart);
|
||||
RET();
|
||||
return res;
|
||||
}
|
||||
|
||||
void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
|
||||
{
|
||||
AlignCode16();
|
||||
void* res = GetRXPtr();
|
||||
|
||||
void* loopStart = GetRXPtr();
|
||||
SUB(W2, W2, 1);
|
||||
|
||||
if (preinc)
|
||||
ADD(W0, W0, 4);
|
||||
|
||||
ABI_PushRegisters({0, 1, 2, 30});
|
||||
if (store)
|
||||
{
|
||||
LDR(X1, X1, ArithOption(X2, true));
|
||||
QuickCallFunction(X4, NDS::ARM7Write32);
|
||||
ABI_PopRegisters({0, 1, 2, 30});
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCallFunction(X4, NDS::ARM7Read32);
|
||||
MOV(W4, W0);
|
||||
ABI_PopRegisters({0, 1, 2, 30});
|
||||
STR(X4, X1, ArithOption(X2, true));
|
||||
}
|
||||
|
||||
if (!preinc)
|
||||
ADD(W0, W0, 4);
|
||||
CBNZ(W2, loopStart);
|
||||
RET();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
|
||||
{
|
||||
u32 val;
|
||||
// make sure arm7 bios is accessible
|
||||
u32 tmpR15 = CurCPU->R[15];
|
||||
CurCPU->R[15] = R15;
|
||||
if (size == 32)
|
||||
{
|
||||
CurCPU->DataRead32(addr & ~0x3, &val);
|
||||
val = ROR(val, (addr & 0x3) << 3);
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
CurCPU->DataRead16(addr & ~0x1, &val);
|
||||
if (signExtend)
|
||||
val = ((s32)val << 16) >> 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
CurCPU->DataRead8(addr, &val);
|
||||
if (signExtend)
|
||||
val = ((s32)val << 24) >> 24;
|
||||
}
|
||||
CurCPU->R[15] = tmpR15;
|
||||
|
||||
MOVI2R(MapReg(rd), val);
|
||||
|
||||
if (Thumb || CurInstr.Cond() == 0xE)
|
||||
RegCache.PutLiteral(rd, val);
|
||||
}
|
||||
|
||||
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
|
||||
{
|
||||
u32 addressMask = ~0;
|
||||
if (size == 32)
|
||||
addressMask = ~3;
|
||||
if (size == 16)
|
||||
addressMask = ~1;
|
||||
|
||||
if (flags & memop_Store)
|
||||
Comp_AddCycles_CD();
|
||||
else
|
||||
Comp_AddCycles_CDI();
|
||||
|
||||
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
|
||||
{
|
||||
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||
|
||||
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
|
||||
{
|
||||
Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ARM64Reg rdMapped = MapReg(rd);
|
||||
ARM64Reg rnMapped = MapReg(rn);
|
||||
|
||||
bool inlinePreparation = Num == 1;
|
||||
u32 constLocalROR32 = 4;
|
||||
|
||||
void* memFunc = Num == 0
|
||||
? MemFunc9[size >> 4][!!(flags & memop_Store)]
|
||||
: MemFunc7[size >> 4][!!((flags & memop_Store))];
|
||||
|
||||
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
|
||||
{
|
||||
u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
|
||||
NDS::MemRegion region;
|
||||
region.Mem = NULL;
|
||||
if (Num == 0)
|
||||
{
|
||||
ARMv5* cpu5 = (ARMv5*)CurCPU;
|
||||
|
||||
// stupid dtcm...
|
||||
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
|
||||
{
|
||||
region.Mem = cpu5->DTCM;
|
||||
region.Mask = 0x3FFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
}
|
||||
}
|
||||
else
|
||||
NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
|
||||
if (region.Mem != NULL)
|
||||
{
|
||||
void* ptr = ®ion.Mem[addr & addressMask & region.Mask];
|
||||
|
||||
MOVP2R(X0, ptr);
|
||||
if (flags & memop_Store)
|
||||
STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
|
||||
else
|
||||
{
|
||||
LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
|
||||
if (size == 32 && addr & ~0x3)
|
||||
ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
|
||||
if (specialFunc)
|
||||
{
|
||||
memFunc = specialFunc;
|
||||
inlinePreparation = true;
|
||||
constLocalROR32 = addr & 0x3;
|
||||
}
|
||||
}
|
||||
|
||||
ARM64Reg finalAddr = W0;
|
||||
if (flags & memop_Post)
|
||||
{
|
||||
finalAddr = rnMapped;
|
||||
MOV(W0, rnMapped);
|
||||
}
|
||||
|
||||
if (flags & memop_Store)
|
||||
MOV(W1, rdMapped);
|
||||
|
||||
if (!offset.IsImm)
|
||||
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
|
||||
// offset might become an immediate
|
||||
if (offset.IsImm)
|
||||
{
|
||||
if (flags & memop_SubtractOffset)
|
||||
SUB(finalAddr, rnMapped, offset.Imm);
|
||||
else
|
||||
ADD(finalAddr, rnMapped, offset.Imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (offset.Reg.ShiftType == ST_ROR)
|
||||
{
|
||||
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
|
||||
offset = Op2(W0);
|
||||
}
|
||||
|
||||
if (flags & memop_SubtractOffset)
|
||||
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
|
||||
else
|
||||
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
|
||||
}
|
||||
|
||||
if (!(flags & memop_Post) && (flags & memop_Writeback))
|
||||
MOV(rnMapped, W0);
|
||||
|
||||
if (inlinePreparation)
|
||||
{
|
||||
if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
|
||||
ANDI2R(rdMapped, W0, 3);
|
||||
if (size > 8)
|
||||
ANDI2R(W0, W0, addressMask);
|
||||
}
|
||||
QuickCallFunction(X2, memFunc);
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (inlinePreparation && !(flags & memop_Store) && size == 32)
|
||||
{
|
||||
if (constLocalROR32 == 4)
|
||||
{
|
||||
LSL(rdMapped, rdMapped, 3);
|
||||
RORV(rdMapped, W0, rdMapped);
|
||||
}
|
||||
else if (constLocalROR32 > 0)
|
||||
ROR_(rdMapped, W0, constLocalROR32 << 3);
|
||||
else
|
||||
MOV(rdMapped, W0);
|
||||
}
|
||||
else if (flags & memop_SignExtend)
|
||||
{
|
||||
if (size == 16)
|
||||
SXTH(rdMapped, W0);
|
||||
else if (size == 8)
|
||||
SXTB(rdMapped, W0);
|
||||
else
|
||||
assert("What's wrong with you?");
|
||||
}
|
||||
else
|
||||
MOV(rdMapped, W0);
|
||||
|
||||
if (CurInstr.Info.Branches())
|
||||
{
|
||||
if (size < 32)
|
||||
printf("LDR size < 32 branching?\n");
|
||||
Comp_JumpTo(rdMapped, Num == 0, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_MemWB()
|
||||
{
|
||||
Op2 offset;
|
||||
if (CurInstr.Instr & (1 << 25))
|
||||
offset = Op2(MapReg(CurInstr.A_Reg(0)), (ShiftType)((CurInstr.Instr >> 5) & 0x3), (CurInstr.Instr >> 7) & 0x1F);
|
||||
else
|
||||
offset = Op2(CurInstr.Instr & 0xFFF);
|
||||
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
bool byte = CurInstr.Instr & (1 << 22);
|
||||
|
||||
int flags = 0;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
if (!(CurInstr.Instr & (1 << 24)))
|
||||
flags |= memop_Post;
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
flags |= memop_Writeback;
|
||||
if (!(CurInstr.Instr & (1 << 23)))
|
||||
flags |= memop_SubtractOffset;
|
||||
|
||||
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, byte ? 8 : 32, flags);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_MemHD()
|
||||
{
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
bool signExtend;
|
||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||
int size;
|
||||
|
||||
if (load)
|
||||
{
|
||||
signExtend = op >= 2;
|
||||
size = op == 2 ? 8 : 16;
|
||||
}
|
||||
else
|
||||
{
|
||||
size = 16;
|
||||
signExtend = false;
|
||||
}
|
||||
|
||||
Op2 offset;
|
||||
if (CurInstr.Instr & (1 << 22))
|
||||
offset = Op2((CurInstr.Instr & 0xF) | ((CurInstr.Instr >> 4) & 0xF0));
|
||||
else
|
||||
offset = Op2(MapReg(CurInstr.A_Reg(0)));
|
||||
|
||||
int flags = 0;
|
||||
if (signExtend)
|
||||
flags |= memop_SignExtend;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
if (!(CurInstr.Instr & (1 << 24)))
|
||||
flags |= memop_Post;
|
||||
if (!(CurInstr.Instr & (1 << 23)))
|
||||
flags |= memop_SubtractOffset;
|
||||
if (CurInstr.Instr & (1 << 21))
|
||||
flags |= memop_Writeback;
|
||||
|
||||
Comp_MemAccess(CurInstr.A_Reg(12), CurInstr.A_Reg(16), offset, size, flags);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemReg()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||
bool load = op & 0x2;
|
||||
bool byte = op & 0x1;
|
||||
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3),
|
||||
Op2(MapReg(CurInstr.T_Reg(6))), byte ? 8 : 32, load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemImm()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 11) & 0x3;
|
||||
bool load = op & 0x1;
|
||||
bool byte = op & 0x2;
|
||||
u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4);
|
||||
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset),
|
||||
byte ? 8 : 32, load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemRegHalf()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 10) & 0x3;
|
||||
bool load = op != 0;
|
||||
int size = op != 1 ? 16 : 8;
|
||||
bool signExtend = op & 1;
|
||||
|
||||
int flags = 0;
|
||||
if (signExtend)
|
||||
flags |= memop_SignExtend;
|
||||
if (!load)
|
||||
flags |= memop_Store;
|
||||
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(MapReg(CurInstr.T_Reg(6))),
|
||||
size, flags);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemImmHalf()
|
||||
{
|
||||
u32 offset = (CurInstr.Instr >> 5) & 0x3E;
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
|
||||
Comp_MemAccess(CurInstr.T_Reg(0), CurInstr.T_Reg(3), Op2(offset), 16,
|
||||
load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_LoadPCRel()
|
||||
{
|
||||
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
||||
|
||||
if (Config::JIT_LiteralOptimisations)
|
||||
{
|
||||
Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
|
||||
Comp_AddCycles_CDI();
|
||||
}
|
||||
else
|
||||
{
|
||||
bool negative = addr < R15;
|
||||
u32 abs = negative ? R15 - addr : addr - R15;
|
||||
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_MemSPRel()
|
||||
{
|
||||
u32 offset = (CurInstr.Instr & 0xFF) * 4;
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
|
||||
Comp_MemAccess(CurInstr.T_Reg(8), 13, Op2(offset), 32, load ? 0 : memop_Store);
|
||||
}
|
||||
|
||||
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
int regsCount = regs.Count();
|
||||
|
||||
if (regsCount == 0)
|
||||
return 0; // actually not the right behaviour TODO: fix me
|
||||
|
||||
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
|
||||
if (store)
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
|
||||
if (usermode && (regs & BitSet16(0x7f00)))
|
||||
UBFX(W0, RCPSR, 0, 5);
|
||||
|
||||
int i = regsCount - 1;
|
||||
|
||||
BitSet16::Iterator it = regs.begin();
|
||||
while (it != regs.end())
|
||||
{
|
||||
BitSet16::Iterator nextReg = it;
|
||||
nextReg++;
|
||||
|
||||
int reg = *it;
|
||||
|
||||
if (usermode && reg >= 8 && reg < 15)
|
||||
{
|
||||
if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
MOV(W3, MapReg(reg));
|
||||
else
|
||||
LoadReg(reg, W3);
|
||||
MOVI2R(W1, reg - 8);
|
||||
BL(ReadBanked);
|
||||
STR(INDEX_UNSIGNED, W3, SP, i * 8);
|
||||
}
|
||||
else if (!usermode && nextReg != regs.end())
|
||||
{
|
||||
ARM64Reg first = W3;
|
||||
ARM64Reg second = W4;
|
||||
|
||||
if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
first = MapReg(reg);
|
||||
else
|
||||
LoadReg(reg, W3);
|
||||
|
||||
if (RegCache.Mapping[*nextReg] != INVALID_REG)
|
||||
second = MapReg(*nextReg);
|
||||
else
|
||||
LoadReg(*nextReg, W4);
|
||||
|
||||
STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
|
||||
|
||||
i--;
|
||||
it++;
|
||||
}
|
||||
else if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
|
||||
else
|
||||
{
|
||||
LoadReg(reg, W3);
|
||||
STR(INDEX_UNSIGNED, W3, SP, i * 8);
|
||||
}
|
||||
i--;
|
||||
it++;
|
||||
}
|
||||
}
|
||||
if (decrement)
|
||||
{
|
||||
SUB(W0, MapReg(rn), regsCount * 4);
|
||||
preinc ^= true;
|
||||
}
|
||||
else
|
||||
MOV(W0, MapReg(rn));
|
||||
ADD(X1, SP, 0);
|
||||
MOVI2R(W2, regsCount);
|
||||
|
||||
BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
|
||||
|
||||
if (!store)
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
|
||||
if (usermode && (regs & BitSet16(0x7f00)))
|
||||
UBFX(W0, RCPSR, 0, 5);
|
||||
|
||||
int i = regsCount - 1;
|
||||
BitSet16::Iterator it = regs.begin();
|
||||
while (it != regs.end())
|
||||
{
|
||||
BitSet16::Iterator nextReg = it;
|
||||
nextReg++;
|
||||
|
||||
int reg = *it;
|
||||
|
||||
if (usermode && reg >= 8 && reg < 15)
|
||||
{
|
||||
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
|
||||
MOVI2R(W1, reg - 8);
|
||||
BL(WriteBanked);
|
||||
FixupBranch alreadyWritten = CBNZ(W4);
|
||||
if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
{
|
||||
MOV(MapReg(reg), W3);
|
||||
RegCache.DirtyRegs |= 1 << reg;
|
||||
}
|
||||
else
|
||||
SaveReg(reg, W3);
|
||||
SetJumpTarget(alreadyWritten);
|
||||
}
|
||||
else if (!usermode && nextReg != regs.end())
|
||||
{
|
||||
ARM64Reg first = W3, second = W4;
|
||||
|
||||
if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
{
|
||||
first = MapReg(reg);
|
||||
if (reg != 15)
|
||||
RegCache.DirtyRegs |= 1 << reg;
|
||||
}
|
||||
if (RegCache.Mapping[*nextReg] != INVALID_REG)
|
||||
{
|
||||
second = MapReg(*nextReg);
|
||||
if (*nextReg != 15)
|
||||
RegCache.DirtyRegs |= 1 << *nextReg;
|
||||
}
|
||||
|
||||
LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
|
||||
|
||||
if (first == W3)
|
||||
SaveReg(reg, W3);
|
||||
if (second == W4)
|
||||
SaveReg(*nextReg, W4);
|
||||
|
||||
it++;
|
||||
i--;
|
||||
}
|
||||
else if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
{
|
||||
ARM64Reg mapped = MapReg(reg);
|
||||
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
|
||||
|
||||
if (reg != 15)
|
||||
RegCache.DirtyRegs |= 1 << reg;
|
||||
}
|
||||
else
|
||||
{
|
||||
LDR(INDEX_UNSIGNED, W3, SP, i * 8);
|
||||
SaveReg(reg, W3);
|
||||
}
|
||||
|
||||
it++;
|
||||
i--;
|
||||
}
|
||||
}
|
||||
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
|
||||
|
||||
if (!store && regs[15])
|
||||
{
|
||||
ARM64Reg mapped = MapReg(15);
|
||||
Comp_JumpTo(mapped, Num == 0, usermode);
|
||||
}
|
||||
|
||||
return regsCount * 4 * (decrement ? -1 : 1);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_LDM_STM()
|
||||
{
|
||||
BitSet16 regs(CurInstr.Instr & 0xFFFF);
|
||||
|
||||
bool load = CurInstr.Instr & (1 << 20);
|
||||
bool pre = CurInstr.Instr & (1 << 24);
|
||||
bool add = CurInstr.Instr & (1 << 23);
|
||||
bool writeback = CurInstr.Instr & (1 << 21);
|
||||
bool usermode = CurInstr.Instr & (1 << 22);
|
||||
|
||||
ARM64Reg rn = MapReg(CurInstr.A_Reg(16));
|
||||
|
||||
s32 offset = Comp_MemAccessBlock(CurInstr.A_Reg(16), regs, !load, pre, !add, usermode);
|
||||
|
||||
if (load && writeback && regs[CurInstr.A_Reg(16)])
|
||||
writeback = Num == 0
|
||||
? (!(regs & ~BitSet16(1 << CurInstr.A_Reg(16)))) || (regs & ~BitSet16((2 << CurInstr.A_Reg(16)) - 1))
|
||||
: false;
|
||||
if (writeback)
|
||||
{
|
||||
if (offset > 0)
|
||||
ADD(rn, rn, offset);
|
||||
else
|
||||
SUB(rn, rn, -offset);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_PUSH_POP()
|
||||
{
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
BitSet16 regs(CurInstr.Instr & 0xFF);
|
||||
if (CurInstr.Instr & (1 << 8))
|
||||
{
|
||||
if (load)
|
||||
regs[15] = true;
|
||||
else
|
||||
regs[14] = true;
|
||||
}
|
||||
|
||||
ARM64Reg sp = MapReg(13);
|
||||
s32 offset = Comp_MemAccessBlock(13, regs, !load, !load, !load, false);
|
||||
|
||||
if (offset > 0)
|
||||
ADD(sp, sp, offset);
|
||||
else
|
||||
SUB(sp, sp, -offset);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_LDMIA_STMIA()
|
||||
{
|
||||
BitSet16 regs(CurInstr.Instr & 0xFF);
|
||||
ARM64Reg rb = MapReg(CurInstr.T_Reg(8));
|
||||
bool load = CurInstr.Instr & (1 << 11);
|
||||
u32 regsCount = regs.Count();
|
||||
|
||||
s32 offset = Comp_MemAccessBlock(CurInstr.T_Reg(8), regs, !load, false, false, false);
|
||||
|
||||
if (!load || !regs[CurInstr.T_Reg(8)])
|
||||
{
|
||||
if (offset > 0)
|
||||
ADD(rb, rb, offset);
|
||||
else
|
||||
SUB(rb, rb, -offset);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "Config.h"
|
||||
|
||||
namespace ARMInstrInfo
|
||||
{
|
||||
|
||||
@ -363,7 +365,11 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||
res.SpecialKind = special_WriteMem;
|
||||
|
||||
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
|
||||
{
|
||||
if (!Config::JIT_LiteralOptimisations)
|
||||
res.SrcRegs |= 1 << 15;
|
||||
res.SpecialKind = special_LoadLiteral;
|
||||
}
|
||||
|
||||
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
|
||||
{
|
||||
@ -417,7 +423,6 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||
u32 cp = ((instr >> 8) & 0xF);
|
||||
if ((num == 0 && cp != 15) || (num == 1 && cp != 14))
|
||||
{
|
||||
printf("happens\n");
|
||||
data = A_UNK;
|
||||
res.Kind = ak_UNK;
|
||||
}
|
||||
|
@ -60,10 +60,31 @@ if (ENABLE_JIT)
|
||||
ARMJIT_x64/ARMJIT_Branch.cpp
|
||||
|
||||
dolphin/CommonFuncs.cpp
|
||||
dolphin/x64ABI.cpp
|
||||
dolphin/x64CPUDetect.cpp
|
||||
dolphin/x64Emitter.cpp
|
||||
)
|
||||
|
||||
if (ARCHITECTURE STREQUAL x86_64)
|
||||
target_sources(core PRIVATE
|
||||
dolphin/x64ABI.cpp
|
||||
dolphin/x64CPUDetect.cpp
|
||||
dolphin/x64Emitter.cpp
|
||||
|
||||
ARMJIT_x64/ARMJIT_Compiler.cpp
|
||||
ARMJIT_x64/ARMJIT_ALU.cpp
|
||||
ARMJIT_x64/ARMJIT_LoadStore.cpp
|
||||
ARMJIT_x64/ARMJIT_Branch.cpp
|
||||
)
|
||||
endif()
|
||||
if (ARCHITECTURE STREQUAL ARM64)
|
||||
target_sources(core PRIVATE
|
||||
dolphin/Arm64Emitter.cpp
|
||||
dolphin/MathUtil.cpp
|
||||
|
||||
ARMJIT_A64/ARMJIT_Compiler.cpp
|
||||
ARMJIT_A64/ARMJIT_ALU.cpp
|
||||
ARMJIT_A64/ARMJIT_LoadStore.cpp
|
||||
ARMJIT_A64/ARMJIT_Branch.cpp
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
|
24
src/dolphin/Align.h
Normal file
24
src/dolphin/Align.h
Normal file
@ -0,0 +1,24 @@
|
||||
// This file is under the public domain.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common
|
||||
{
|
||||
template <typename T>
|
||||
constexpr T AlignUp(T value, size_t size)
|
||||
{
|
||||
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
|
||||
return static_cast<T>(value + (size - value % size) % size);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T AlignDown(T value, size_t size)
|
||||
{
|
||||
static_assert(std::is_unsigned<T>(), "T must be an unsigned value.");
|
||||
return static_cast<T>(value - value % size);
|
||||
}
|
||||
|
||||
} // namespace Common
|
4466
src/dolphin/Arm64Emitter.cpp
Normal file
4466
src/dolphin/Arm64Emitter.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1152
src/dolphin/Arm64Emitter.h
Normal file
1152
src/dolphin/Arm64Emitter.h
Normal file
File diff suppressed because it is too large
Load Diff
27
src/dolphin/ArmCommon.h
Normal file
27
src/dolphin/ArmCommon.h
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright 2014 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "../types.h"
|
||||
|
||||
enum CCFlags
|
||||
{
|
||||
CC_EQ = 0, // Equal
|
||||
CC_NEQ, // Not equal
|
||||
CC_CS, // Carry Set
|
||||
CC_CC, // Carry Clear
|
||||
CC_MI, // Minus (Negative)
|
||||
CC_PL, // Plus
|
||||
CC_VS, // Overflow
|
||||
CC_VC, // No Overflow
|
||||
CC_HI, // Unsigned higher
|
||||
CC_LS, // Unsigned lower or same
|
||||
CC_GE, // Signed greater than or equal
|
||||
CC_LT, // Signed less than
|
||||
CC_GT, // Signed greater than
|
||||
CC_LE, // Signed less than or equal
|
||||
CC_AL, // Always (unconditional) 14
|
||||
CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same
|
||||
CC_LO = CC_CC, // Alias of CC_CC Unsigned lower
|
||||
};
|
||||
const u32 NO_COND = 0xE0000000;
|
254
src/dolphin/BitUtils.h
Normal file
254
src/dolphin/BitUtils.h
Normal file
@ -0,0 +1,254 @@
|
||||
// Copyright 2017 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <type_traits>
|
||||
|
||||
namespace Common
|
||||
{
|
||||
///
|
||||
/// Retrieves the size of a type in bits.
|
||||
///
|
||||
/// @tparam T Type to get the size of.
|
||||
///
|
||||
/// @return the size of the type in bits.
|
||||
///
|
||||
template <typename T>
|
||||
constexpr size_t BitSize() noexcept
|
||||
{
|
||||
return sizeof(T) * CHAR_BIT;
|
||||
}
|
||||
|
||||
///
|
||||
/// Extracts a bit from a value.
|
||||
///
|
||||
/// @param src The value to extract a bit from.
|
||||
/// @param bit The bit to extract.
|
||||
///
|
||||
/// @tparam T The type of the value.
|
||||
///
|
||||
/// @return The extracted bit.
|
||||
///
|
||||
template <typename T>
|
||||
constexpr T ExtractBit(const T src, const size_t bit) noexcept
|
||||
{
|
||||
return (src >> bit) & static_cast<T>(1);
|
||||
}
|
||||
|
||||
///
|
||||
/// Extracts a bit from a value.
|
||||
///
|
||||
/// @param src The value to extract a bit from.
|
||||
///
|
||||
/// @tparam bit The bit to extract.
|
||||
/// @tparam T The type of the value.
|
||||
///
|
||||
/// @return The extracted bit.
|
||||
///
|
||||
template <size_t bit, typename T>
|
||||
constexpr T ExtractBit(const T src) noexcept
|
||||
{
|
||||
static_assert(bit < BitSize<T>(), "Specified bit must be within T's bit width.");
|
||||
|
||||
return ExtractBit(src, bit);
|
||||
}
|
||||
|
||||
///
|
||||
/// Extracts a range of bits from a value.
|
||||
///
|
||||
/// @param src The value to extract the bits from.
|
||||
/// @param begin The beginning of the bit range. This is inclusive.
|
||||
/// @param end The ending of the bit range. This is inclusive.
|
||||
///
|
||||
/// @tparam T The type of the value.
|
||||
/// @tparam Result The returned result type. This is the unsigned analog
|
||||
/// of a signed type if a signed type is passed as T.
|
||||
///
|
||||
/// @return The extracted bits.
|
||||
///
|
||||
template <typename T, typename Result = std::make_unsigned_t<T>>
|
||||
constexpr Result ExtractBits(const T src, const size_t begin, const size_t end) noexcept
|
||||
{
|
||||
return static_cast<Result>(((static_cast<Result>(src) << ((BitSize<T>() - 1) - end)) >>
|
||||
(BitSize<T>() - end + begin - 1)));
|
||||
}
|
||||
|
||||
///
|
||||
/// Extracts a range of bits from a value.
|
||||
///
|
||||
/// @param src The value to extract the bits from.
|
||||
///
|
||||
/// @tparam begin The beginning of the bit range. This is inclusive.
|
||||
/// @tparam end The ending of the bit range. This is inclusive.
|
||||
/// @tparam T The type of the value.
|
||||
/// @tparam Result The returned result type. This is the unsigned analog
|
||||
/// of a signed type if a signed type is passed as T.
|
||||
///
|
||||
/// @return The extracted bits.
|
||||
///
|
||||
template <size_t begin, size_t end, typename T, typename Result = std::make_unsigned_t<T>>
|
||||
constexpr Result ExtractBits(const T src) noexcept
|
||||
{
|
||||
static_assert(begin < end, "Beginning bit must be less than the ending bit.");
|
||||
static_assert(begin < BitSize<T>(), "Beginning bit is larger than T's bit width.");
|
||||
static_assert(end < BitSize<T>(), "Ending bit is larger than T's bit width.");
|
||||
|
||||
return ExtractBits<T, Result>(src, begin, end);
|
||||
}
|
||||
|
||||
///
|
||||
/// Rotates a value left (ROL).
|
||||
///
|
||||
/// @param value The value to rotate.
|
||||
/// @param amount The number of bits to rotate the value.
|
||||
/// @tparam T An unsigned type.
|
||||
///
|
||||
/// @return The rotated value.
|
||||
///
|
||||
template <typename T>
|
||||
constexpr T RotateLeft(const T value, size_t amount) noexcept
|
||||
{
|
||||
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types left.");
|
||||
|
||||
amount %= BitSize<T>();
|
||||
|
||||
if (amount == 0)
|
||||
return value;
|
||||
|
||||
return static_cast<T>((value << amount) | (value >> (BitSize<T>() - amount)));
|
||||
}
|
||||
|
||||
///
|
||||
/// Rotates a value right (ROR).
|
||||
///
|
||||
/// @param value The value to rotate.
|
||||
/// @param amount The number of bits to rotate the value.
|
||||
/// @tparam T An unsigned type.
|
||||
///
|
||||
/// @return The rotated value.
|
||||
///
|
||||
template <typename T>
|
||||
constexpr T RotateRight(const T value, size_t amount) noexcept
|
||||
{
|
||||
static_assert(std::is_unsigned<T>(), "Can only rotate unsigned types right.");
|
||||
|
||||
amount %= BitSize<T>();
|
||||
|
||||
if (amount == 0)
|
||||
return value;
|
||||
|
||||
return static_cast<T>((value >> amount) | (value << (BitSize<T>() - amount)));
|
||||
}
|
||||
|
||||
///
|
||||
/// Verifies whether the supplied value is a valid bit mask of the form 0b00...0011...11.
|
||||
/// Both edge cases of all zeros and all ones are considered valid masks, too.
|
||||
///
|
||||
/// @param mask The mask value to test for validity.
|
||||
///
|
||||
/// @tparam T The type of the value.
|
||||
///
|
||||
/// @return A bool indicating whether the mask is valid.
|
||||
///
|
||||
template <typename T>
|
||||
constexpr bool IsValidLowMask(const T mask) noexcept
|
||||
{
|
||||
static_assert(std::is_integral<T>::value, "Mask must be an integral type.");
|
||||
static_assert(std::is_unsigned<T>::value, "Signed masks can introduce hard to find bugs.");
|
||||
|
||||
// Can be efficiently determined without looping or bit counting. It's the counterpart
|
||||
// to https://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
|
||||
// and doesn't require special casing either edge case.
|
||||
return (mask & (mask + 1)) == 0;
|
||||
}
|
||||
|
||||
///
|
||||
/// Reinterpret objects of one type as another by bit-casting between object representations.
|
||||
///
|
||||
/// @remark This is the example implementation of std::bit_cast which is to be included
|
||||
/// in C++2a. See http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0476r2.html
|
||||
/// for more details. The only difference is this variant is not constexpr,
|
||||
/// as the mechanism for bit_cast requires a compiler built-in to have that quality.
|
||||
///
|
||||
/// @param source The source object to convert to another representation.
|
||||
///
|
||||
/// @tparam To The type to reinterpret source as.
|
||||
/// @tparam From The initial type representation of source.
|
||||
///
|
||||
/// @return The representation of type From as type To.
|
||||
///
|
||||
/// @pre Both To and From types must be the same size
|
||||
/// @pre Both To and From types must satisfy the TriviallyCopyable concept.
|
||||
///
|
||||
template <typename To, typename From>
|
||||
inline To BitCast(const From& source) noexcept
|
||||
{
|
||||
static_assert(sizeof(From) == sizeof(To),
|
||||
"BitCast source and destination types must be equal in size.");
|
||||
static_assert(std::is_trivially_copyable<From>(),
|
||||
"BitCast source type must be trivially copyable.");
|
||||
static_assert(std::is_trivially_copyable<To>(),
|
||||
"BitCast destination type must be trivially copyable.");
|
||||
|
||||
std::aligned_storage_t<sizeof(To), alignof(To)> storage;
|
||||
std::memcpy(&storage, &source, sizeof(storage));
|
||||
return reinterpret_cast<To&>(storage);
|
||||
}
|
||||
|
||||
template <typename T, typename PtrType>
|
||||
class BitCastPtrType
|
||||
{
|
||||
public:
|
||||
static_assert(std::is_trivially_copyable<PtrType>(),
|
||||
"BitCastPtr source type must be trivially copyable.");
|
||||
static_assert(std::is_trivially_copyable<T>(),
|
||||
"BitCastPtr destination type must be trivially copyable.");
|
||||
|
||||
explicit BitCastPtrType(PtrType* ptr) : m_ptr(ptr) {}
|
||||
|
||||
// Enable operator= only for pointers to non-const data
|
||||
template <typename S>
|
||||
inline typename std::enable_if<std::is_same<S, T>() && !std::is_const<PtrType>()>::type
|
||||
operator=(const S& source)
|
||||
{
|
||||
std::memcpy(m_ptr, &source, sizeof(source));
|
||||
}
|
||||
|
||||
inline operator T() const
|
||||
{
|
||||
T result;
|
||||
std::memcpy(&result, m_ptr, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
PtrType* m_ptr;
|
||||
};
|
||||
|
||||
// Provides an aliasing-safe alternative to reinterpret_cast'ing pointers to structs
|
||||
// Conversion constructor and operator= provided for a convenient syntax.
|
||||
// Usage: MyStruct s = BitCastPtr<MyStruct>(some_ptr);
|
||||
// BitCastPtr<MyStruct>(some_ptr) = s;
|
||||
template <typename T, typename PtrType>
|
||||
inline auto BitCastPtr(PtrType* ptr) noexcept -> BitCastPtrType<T, PtrType>
|
||||
{
|
||||
return BitCastPtrType<T, PtrType>{ptr};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SetBit(T& value, size_t bit_number, bool bit_value)
|
||||
{
|
||||
static_assert(std::is_unsigned<T>(), "SetBit is only sane on unsigned types.");
|
||||
|
||||
if (bit_value)
|
||||
value |= (T{1} << bit_number);
|
||||
else
|
||||
value &= ~(T{1} << bit_number);
|
||||
}
|
||||
|
||||
} // namespace Common
|
@ -61,3 +61,15 @@
|
||||
{ \
|
||||
printf(fmt "\n", ## __VA_ARGS__); \
|
||||
} while (false)
|
||||
|
||||
#if __cplusplus < 201703L
|
||||
// cheat
|
||||
namespace std
|
||||
{
|
||||
template <typename T>
|
||||
T clamp(const T& v, const T& lo, const T& hi)
|
||||
{
|
||||
return v < lo ? lo : (v > hi ? hi : v);
|
||||
}
|
||||
}
|
||||
#endif
|
13
src/dolphin/MathUtil.cpp
Normal file
13
src/dolphin/MathUtil.cpp
Normal file
@ -0,0 +1,13 @@
|
||||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "MathUtil.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
// Calculate sum of a float list
|
||||
float MathFloatVectorSum(const std::vector<float>& Vec)
|
||||
{
|
||||
return std::accumulate(Vec.begin(), Vec.end(), 0.0f);
|
||||
}
|
121
src/dolphin/MathUtil.h
Normal file
121
src/dolphin/MathUtil.h
Normal file
@ -0,0 +1,121 @@
|
||||
// Copyright 2008 Dolphin Emulator Project
|
||||
// Licensed under GPLv2+
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "Compat.h"
|
||||
|
||||
#include "../types.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
namespace MathUtil
|
||||
{
|
||||
constexpr double TAU = 6.2831853071795865;
|
||||
constexpr double PI = TAU / 2;
|
||||
|
||||
template <typename T>
|
||||
constexpr auto Sign(const T& val) -> decltype((T{} < val) - (val < T{}))
|
||||
{
|
||||
return (T{} < val) - (val < T{});
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
constexpr auto Lerp(const T& x, const T& y, const F& a) -> decltype(x + (y - x) * a)
|
||||
{
|
||||
return x + (y - x) * a;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr bool IsPow2(T imm)
|
||||
{
|
||||
return imm > 0 && (imm & (imm - 1)) == 0;
|
||||
}
|
||||
|
||||
constexpr u32 NextPowerOf2(u32 value)
|
||||
{
|
||||
--value;
|
||||
value |= value >> 1;
|
||||
value |= value >> 2;
|
||||
value |= value >> 4;
|
||||
value |= value >> 8;
|
||||
value |= value >> 16;
|
||||
++value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct Rectangle
|
||||
{
|
||||
T left{};
|
||||
T top{};
|
||||
T right{};
|
||||
T bottom{};
|
||||
|
||||
constexpr Rectangle() = default;
|
||||
|
||||
constexpr Rectangle(T theLeft, T theTop, T theRight, T theBottom)
|
||||
: left(theLeft), top(theTop), right(theRight), bottom(theBottom)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr bool operator==(const Rectangle& r) const
|
||||
{
|
||||
return left == r.left && top == r.top && right == r.right && bottom == r.bottom;
|
||||
}
|
||||
|
||||
T GetWidth() const { return abs(right - left); }
|
||||
T GetHeight() const { return abs(bottom - top); }
|
||||
// If the rectangle is in a coordinate system with a lower-left origin, use
|
||||
// this Clamp.
|
||||
void ClampLL(T x1, T y1, T x2, T y2)
|
||||
{
|
||||
left = std::clamp(left, x1, x2);
|
||||
right = std::clamp(right, x1, x2);
|
||||
top = std::clamp(top, y2, y1);
|
||||
bottom = std::clamp(bottom, y2, y1);
|
||||
}
|
||||
|
||||
// If the rectangle is in a coordinate system with an upper-left origin,
|
||||
// use this Clamp.
|
||||
void ClampUL(T x1, T y1, T x2, T y2)
|
||||
{
|
||||
left = std::clamp(left, x1, x2);
|
||||
right = std::clamp(right, x1, x2);
|
||||
top = std::clamp(top, y1, y2);
|
||||
bottom = std::clamp(bottom, y1, y2);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace MathUtil
|
||||
|
||||
float MathFloatVectorSum(const std::vector<float>&);
|
||||
|
||||
// Rounds down. 0 -> undefined
|
||||
inline int IntLog2(u64 val)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
return 63 - __builtin_clzll(val);
|
||||
|
||||
#elif defined(_MSC_VER)
|
||||
unsigned long result = ULONG_MAX;
|
||||
_BitScanReverse64(&result, val);
|
||||
return result;
|
||||
|
||||
#else
|
||||
int result = -1;
|
||||
while (val != 0)
|
||||
{
|
||||
val >>= 1;
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue
Block a user