diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp new file mode 100644 index 0000000000..8c4f4f4ff4 --- /dev/null +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -0,0 +1,1360 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Arm64Emitter.h" + +namespace Arm64Gen +{ + +void ARM64XEmitter::SetCodePtr(u8 *ptr) +{ + code = ptr; + startcode = code; + lastCacheFlushEnd = ptr; +} + +const u8 *ARM64XEmitter::GetCodePtr() const +{ + return code; +} + +u8 *ARM64XEmitter::GetWritableCodePtr() +{ + return code; +} + +void ARM64XEmitter::ReserveCodeSpace(u32 bytes) +{ + for (u32 i = 0; i < bytes/4; i++) + BRK(0); +} + +const u8 *ARM64XEmitter::AlignCode16() +{ + int c = int((u64)code & 15); + if (c) + ReserveCodeSpace(16-c); + return code; +} + +const u8 *ARM64XEmitter::AlignCodePage() +{ + int c = int((u64)code & 4095); + if (c) + ReserveCodeSpace(4096-c); + return code; +} + +void ARM64XEmitter::FlushIcache() +{ + FlushIcacheSection(lastCacheFlushEnd, code); + lastCacheFlushEnd = code; +} + +void ARM64XEmitter::FlushIcacheSection(u8 *start, u8 *end) +{ +#if defined(IOS) + // Header file says this is equivalent to: sys_icache_invalidate(start, end - start); + sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); +#else +#ifdef __clang__ + __clear_cache(start, end); +#else + __builtin___clear_cache(start, end); +#endif +#endif +} + + + +// Exception generation +const u32 ExcEnc[][3] = { + {0, 0, 1}, // SVC + {0, 0, 2}, // HVC + {0, 0, 3}, // SMC + {1, 0, 0}, // BRK + {2, 0, 0}, // HLT + {5, 0, 1}, // DCPS1 + {5, 0, 2}, // DCPS2 + {5, 0, 3}, // DCPS3 +}; + +// Arithmetic generation +const u32 ArithEnc[] = { + 0x058, // ADD + 0x258, // SUB +}; + +// Conditional Select +const u32 CondSelectEnc[][2] = { + {0, 0}, // CSEL + {0, 1}, // CSINC + {1, 0}, // CSINV + {1, 1}, // CSNEG +}; + +// Data-Processing (1 source) +const u32 Data1SrcEnc[][2] = { + {0, 0}, // RBIT + {0, 1}, // REV16 + {0, 2}, // REV32 + {0, 3}, // REV64 + {0, 4}, // CLZ + {0, 5}, // CLS +}; + +// Data-Processing (2 source) +const u32 Data2SrcEnc[] = { + 0x02, // UDIV + 0x03, // SDIV + 0x08, // LSLV + 0x09, // LSRV + 0x0A, // ASRV + 0x0B, // RORV + 0x10, // CRC32B + 0x11, // CRC32H + 0x12, // CRC32W + 0x14, // CRC32CB + 0x15, // CRC32CH + 0x16, // CRC32CW + 0x13, // CRC32X (64bit Only) + 0x17, // XRC32CX (64bit Only) +}; + +// Data-Processing (3 source) +const u32 Data3SrcEnc[][2] = { + {0, 0}, // MADD + {0, 1}, // MSUB + {1, 0}, // SMADDL (64Bit Only) + {1, 1}, // SMSUBL (64Bit Only) + {2, 0}, // SMULH (64Bit Only) + {5, 0}, // UMADDL (64Bit Only) + {5, 1}, // UMSUBL (64Bit Only) + {6, 0}, // UMULH (64Bit Only) +}; + +// Logical (shifted register) +const u32 LogicalEnc[][2] = { + {0, 0}, // AND + {0, 1}, // BIC + {1, 0}, // OOR + {1, 1}, // ORN + {2, 0}, // EOR + {2, 1}, // EON + {3, 0}, // ANDS + {3, 1}, // BICS +}; + +// Load/Store Exclusive +u32 LoadStoreExcEnc[][5] = { + {0, 0, 0, 0, 0}, // STXRB + {0, 0, 0, 0, 1}, // STLXRB + {0, 0, 1, 0, 0}, // LDXRB + {0, 0, 1, 0, 1}, // LDAXRB + {0, 1, 0, 0, 1}, // STLRB + {0, 1, 1, 0, 1}, // LDARB + {1, 0, 0, 0, 0}, // STXRH + {1, 0, 0, 0, 1}, // STLXRH + {1, 0, 1, 0, 0}, // LDXRH + {1, 0, 1, 0, 1}, // LDAXRH + {1, 1, 0, 0, 1}, // STLRH + {1, 1, 1, 0, 1}, // LDARH + {2, 0, 0, 0, 0}, // STXR + {3, 0, 0, 0, 0}, // (64bit) STXR + {2, 0, 0, 0, 1}, // STLXR + {3, 0, 0, 0, 1}, // (64bit) STLXR + {2, 0, 0, 1, 0}, // STXP + {3, 0, 0, 1, 0}, // (64bit) STXP + {2, 0, 0, 1, 1}, // STLXP + {3, 0, 0, 1, 1}, // (64bit) STLXP + {2, 0, 1, 0, 0}, // LDXR + {3, 0, 1, 0, 0}, // (64bit) LDXR + {2, 0, 1, 0, 1}, // LDAXR + {3, 0, 1, 0, 1}, // (64bit) LDAXR + {2, 0, 1, 1, 0}, // LDXP + {3, 0, 1, 1, 0}, // (64bit) LDXP + {2, 0, 1, 1, 1}, // LDAXP + {3, 0, 1, 1, 1}, // (64bit) LDAXP + {2, 1, 0, 0, 1}, // STLR + {3, 1, 0, 0, 1}, // (64bit) STLR + {2, 1, 1, 0, 1}, // LDAR + {3, 1, 1, 0, 1}, // (64bit) LDAR +}; + +void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr) +{ + bool b64Bit = is64Bit(Rt); + s64 distance = (s64)ptr - (s64(code) + 8); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Rt = DecodeReg(Rt); + Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \ + (distance << 5) | Rt); +} + +void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) +{ + bool b64Bit = is64Bit(Rt); + s64 distance = (s64)ptr - (s64(code) + 8); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Rt = DecodeReg(Rt); + Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \ + (bits << 19) | (distance << 5) | Rt); +} + +void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) +{ + s64 distance = (s64)ptr - s64(code); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF)); +} + +void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn) +{ + Rn = DecodeReg(Rn); + Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4); +} + +void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm) +{ + _assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __FUNCTION__, imm); + + Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]); +} + +void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) +{ + Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt); +} + +void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + Write32((flags << 29) | (ArithEnc[instenc] << 21) | \ + (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? 1 << 21 : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | \ + (0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + bool b64Bit = is64Bit(Rn); + + _assert_msg_(DYNA_REC, !(imm & ~0x1F), "%s: too large immediate: %d", __FUNCTION__, imm) + _assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv) + + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \ + (imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv); +} + +void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + bool b64Bit = is64Bit(Rm); + + _assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv) + + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \ + (Rm << 16) | (cond << 12) | (Rn << 5) | nzcv); +} + +void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | \ + (0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (0x2D6 << 21) | \ + (Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (0x0D6 << 21) | \ + (Rm << 16) | (Data2SrcEnc[instenc] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Ra = DecodeReg(Ra); + Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | \ + (Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | \ + (Ra << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((LogicalEnc[instenc][0] << 29) | (0x50 << 21) | (LogicalEnc[instenc][1] << 21) | \ + Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + _assert_msg_(DYNA_REC, !(imm & 0xFFFFF), "%s: offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set + bitop |= 0x1; + Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, + ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt) +{ + Rs = DecodeReg(Rs); + Rt2 = DecodeReg(Rt2); + Rn = DecodeReg(Rn); + Rt = DecodeReg(Rt); + Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | \ + (LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | \ + (LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool b128Bit = is128Bit(Rt); + bool bVec = isVector(Rt); + + if (b128Bit) + imm >>= 4; + else if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0xF), "%s: offset too large %d", __FUNCTION__, imm); + + u32 opc = 0; + if (b128Bit) + opc = 2; + else if (b64Bit && bVec) + opc = 1; + else if (b64Bit && !bVec) + opc = 2; + + Rt = DecodeReg(Rt); + Rt2 = DecodeReg(Rt2); + Rn = DecodeReg(Rn); + Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0x1FF), "%s: offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 12) | (op2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + bool b64Bit = is64Bit(Rd); + + _assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: immediate out of range: %d", __FUNCTION__, imm); + + Rd = DecodeReg(Rd); + Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd); +} + +void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | \ + (immr << 16) | (imms << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (Rm << 16) | \ + (extend << 13) | (1 << 11) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd) +{ + bool b64Bit = is64Bit(Rd); + + _assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s: immediate too large: %x", __FUNCTION__, imm); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | \ + (imm << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. + // Use Rn to determine bitness here. + bool b64Bit = is64Bit(Rn); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (b64Bit << 22) | \ + (immr << 16) | (imms << 10) | (Rn << 5) | Rd); +} + +// FixupBranch branching +void ARM64XEmitter::SetJumpTarget(FixupBranch const &branch) +{ + bool Not = false; + u32 inst = 0; + s64 distance = (s64)(code - branch.ptr); + distance >>= 2; + + switch (branch.type) + { + case 1: // CBNZ + Not = true; + case 0: // CBZ + { + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + bool b64Bit = is64Bit(branch.reg); + ARM64Reg reg = DecodeReg(branch.reg); + inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (distance << 5) | reg; + } + break; + case 2: // B (conditional) + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x2A << 25) | (distance << 5) | branch.cond; + break; + case 4: // TBNZ + Not = true; + case 3: // TBZ + { + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + ARM64Reg reg = DecodeReg(branch.reg); + inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg; + } + break; + case 5: // B (uncoditional) + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x5 << 26) | distance; + break; + case 6: // BL (unconditional) + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x25 << 26) | distance; + break; + } + *(u32*)branch.ptr = inst; +} + +FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 0; + branch.reg = Rt; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 1; + branch.reg = Rt; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::B(CCFlags cond) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 2; + branch.cond = cond; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 3; + branch.reg = Rt; + branch.bit = bit; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 4; + branch.reg = Rt; + branch.bit = bit; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::B() +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 5; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::BL() +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 6; + HINT(HINT_NOP); + return branch; +} + +// Compare and Branch +void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr) +{ + EncodeCompareBranchInst(0, Rt, ptr); +} +void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr) +{ + EncodeCompareBranchInst(1, Rt, ptr); +} + +// Conditional Branch +void ARM64XEmitter::B(CCFlags cond, const void* ptr) +{ + s64 distance = (s64)ptr - (s64(code) + 8); + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Write32((0x54 << 24) | (distance << 5) | cond); +} + +// Test and Branch +void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr) +{ + EncodeTestBranchInst(0, Rt, bits, ptr); +} +void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr) +{ + EncodeTestBranchInst(1, Rt, bits, ptr); +} + +// Unconditional Branch +void ARM64XEmitter::B(const void *ptr) +{ + EncodeUnconditionalBranchInst(0, ptr); +} +void ARM64XEmitter::BL(const void *ptr) +{ + EncodeUnconditionalBranchInst(1, ptr); +} + +// Unconditional Branch (register) +void ARM64XEmitter::BR(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::BLR(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::RET(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::ERET() +{ + EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP); +} +void ARM64XEmitter::DRPS() +{ + EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP); +} + +// Exception generation +void ARM64XEmitter::SVC(u32 imm) +{ + EncodeExceptionInst(0, imm); +} + +void ARM64XEmitter::HVC(u32 imm) +{ + EncodeExceptionInst(1, imm); +} + +void ARM64XEmitter::SMC(u32 imm) +{ + EncodeExceptionInst(2, imm); +} + +void ARM64XEmitter::BRK(u32 imm) +{ + EncodeExceptionInst(3, imm); +} + +void ARM64XEmitter::HLT(u32 imm) +{ + EncodeExceptionInst(4, imm); +} + +void ARM64XEmitter::DCPS1(u32 imm) +{ + EncodeExceptionInst(5, imm); +} + +void ARM64XEmitter::DCPS2(u32 imm) +{ + EncodeExceptionInst(6, imm); +} + +void ARM64XEmitter::DCPS3(u32 imm) +{ + EncodeExceptionInst(7, imm); +} + +// System +void ARM64XEmitter::_MSR(PStateField field, u8 imm) +{ + u32 op1 = 0, op2 = 0; + switch (field) + { + case FIELD_SPSel: + op1 = 0; op2 = 5; + break; + case FIELD_DAIFSet: + op1 = 3; op2 = 6; + break; + case FIELD_DAIFClr: + op1 = 3; op2 = 7; + break; + } + EncodeSystemInst(0, op1, 3, imm, op2, WSP); +} +void ARM64XEmitter::HINT(SystemHint op) +{ + EncodeSystemInst(0, 3, 2, 0, op, WSP); +} +void ARM64XEmitter::CLREX() +{ + EncodeSystemInst(0, 3, 3, 0, 2, WSP); +} +void ARM64XEmitter::DSB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 4, WSP); +} +void ARM64XEmitter::DMB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 5, WSP); +} +void ARM64XEmitter::ISB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 6, WSP); +} + +// Add/Subtract (extended register) +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + ADD(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + ADD(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + SUB(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + SUB(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm) +{ + CMN(Rn, Rm, ArithOption(Rn)); +} + +void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, true, SP, Rn, Rm, Option); +} + +void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm) +{ + CMP(Rn, Rm, ArithOption(Rn)); +} + +void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, true, SP, Rn, Rm, Option); +} + +// Add/Subtract (with carry) +void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm); +} +void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm); +} +void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm); +} +void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm); +} + +// Conditional Compare (immediate) +void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond); +} +void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond); +} + +// Conditiona Compare (register) +void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond); +} +void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond); +} + +// Conditional Select +void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(0, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(1, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(2, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(3, Rd, Rn, Rm, cond); +} + +// Data-Processing 1 source +void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(0, Rd, Rn); +} +void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(1, Rd, Rn); +} +void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(2, Rd, Rn); +} +void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(3, Rd, Rn); +} +void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(4, Rd, Rn); +} +void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(5, Rd, Rn); +} + +// Data-Processing 2 source +void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(0, Rd, Rn, Rm); +} +void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(1, Rd, Rn, Rm); +} +void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(2, Rd, Rn, Rm); +} +void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(3, Rd, Rn, Rm); +} +void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(4, Rd, Rn, Rm); +} +void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(5, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(6, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(7, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(8, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(9, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(10, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(11, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(12, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(13, Rd, Rn, Rm); +} + +// Data-Processing 3 source +void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(0, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(1, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(2, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(3, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(4, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(5, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(6, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(7, Rd, Rn, Rm, Ra); +} + +// Logical (shifted register) +void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(0, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(1, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(2, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(3, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(4, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(5, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(6, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(7, Rd, Rn, Rm, Shift); +} + +// Logical (immediate) +void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(0, Rd, Rn, immr, imms); +} +void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(3, Rd, Rn, immr, imms); +} +void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(2, Rd, Rn, immr, imms); +} +void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(1, Rd, Rn, immr, imms); +} +void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(3, SP, Rn, immr, imms); +} + +// Add/subtract (immediate) +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd); +} +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd); +} +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd); +} +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd); +} +void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, true, shift, imm, Rn, is64Bit(Rn) ? SP : WSP); +} + +// Data Processing (Immediate) +void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(2, Rd, imm, pos); +} +void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(0, Rd, imm, pos); +} +void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(3, Rd, imm, pos); +} + +// Bitfield move +void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms); +} +void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms); +} +void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms); +} + +// Load Register (Literal) +void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(0, Rt, imm); +} +void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(2, Rt, imm); +} +void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(3, Rt, imm); +} + +// Load/Store Exclusive +void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(12 + is64Bit(Rt), Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(14 + is64Bit(Rt), Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(16 + is64Bit(Rt), Rs, Rt2, Rt, Rn); +} +void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(18 + is64Bit(Rt), Rs, Rt2, Rt, Rn); +} +void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(20 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(22 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(24 + is64Bit(Rt), SP, Rt2, Rt, Rn); +} +void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(26 + is64Bit(Rt), SP, Rt2, Rt, Rn); +} +void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(28 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(30 + is64Bit(Rt), SP, SP, Rt, Rn); +} + +// Load/Store no-allocate pair (offset) +void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm); +} +void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm); +} + +// Load/Store register (immediate post-indexed) +// XXX: Most of these support vectors +void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x0E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x0E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x0E2 : 0x0E3, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x1E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x1E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x1E2 : 0x1E3, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E0 : 0x2E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E1 : 0x2E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x2E2, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} + +// Load/Store register (register offset) +void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm, extend); +} + +// Wrapper around MOVZ+MOVK +void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) +{ + unsigned parts = is64Bit(Rd) ? 4 : 2; + bool upload_part[4] = {}; + bool need_movz = false; + + if (!is64Bit(Rd)) + _assert_msg_(DYNA_REC, !(imm >> 32), "%s: immediate doesn't fit in 32bit register: %lx", __FUNCTION__, imm); + + // XXX: Optimize more + // XXX: Support rotating immediates to save instructions + if (optimize) + { + for (unsigned i = 0; i < parts; ++i) + { + if ((imm >> (i * 16)) & 0xFFFF) + upload_part[i] = true; + else + need_movz = true; + } + } + + for (unsigned i = 0; i < parts; ++i) + { + if (need_movz && upload_part[i]) + { + MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i); + need_movz = false; + } + else + { + if (upload_part[i] || !optimize) + MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i); + } + } + +} + +} + diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h new file mode 100644 index 0000000000..e8294b9a25 --- /dev/null +++ b/Source/Core/Common/Arm64Emitter.h @@ -0,0 +1,538 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/ArmCommon.h" +#include "Common/CodeBlock.h" +#include "Common/Common.h" + +namespace Arm64Gen +{ + +// X30 serves a dual purpose as a link register +// Encoded as +// Types: +// 000 - 32bit GPR +// 001 - 64bit GPR +// 010 - VFP single precision +// 100 - VFP double precision +// 110 - VFP quad precision +enum ARM64Reg +{ + // 32bit registers + W0 = 0, W1, W2, W3, W4, W5, W6, + W7, W8, W9, W10, W11, W12, W13, W14, + W15, W16, W17, W18, W19, W20, W21, W22, + W23, W24, W25, W26, W27, W28, W29, W30, + + WSP, // 32bit stack pointer + + // 64bit registers + X0 = 0x20, X1, X2, X3, X4, X5, X6, + X7, X8, X9, X10, X11, X12, X13, X14, + X15, X16, X17, X18, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, X29, X30, + + SP, // 64bit stack pointer + + // VFP single precision registers + S0 = 0x40, S1, S2, S3, S4, S5, S6, + S7, S8, S9, S10, S11, S12, S13, + S14, S15, S16, S17, S18, S19, S20, + S21, S22, S23, S24, S25, S26, S27, + S28, S29, S30, S31, + + // VFP Double Precision registers + D0 = 0x80, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, + + // ASIMD Quad-Word registers + Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23, + Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, + + // For PRFM(prefetch memory) encoding + // This is encoded in the Rt register + // Data preload + PLDL1KEEP = 0, PLDL1STRM, + PLDL2KEEP, PLDL2STRM, + PLDL3KEEP, PLDL3STRM, + // Instruction preload + PLIL1KEEP = 8, PLIL1STRM, + PLIL2KEEP, PLIL2STRM, + PLIL3KEEP, PLIL3STRM, + // Prepare for store + PLTL1KEEP = 16, PLTL1STRM, + PLTL2KEEP, PLTL2STRM, + PLTL3KEEP, PLTL3STRM, + + INVALID_REG = 0xFFFFFFFF +}; + +inline bool is64Bit(ARM64Reg reg) { return reg & 0x20; } +inline bool is128Bit(ARM64Reg reg) { return reg & 0xC0; } +inline bool isVector(ARM64Reg reg) { return (reg & 0xC0) != 0; } +inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); } +inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); } + +enum OpType +{ + TYPE_IMM = 0, + TYPE_REG, + TYPE_IMMSREG, + TYPE_RSR, + TYPE_MEM +}; + +enum ShiftType +{ + ST_LSL = 0, + ST_LSR = 1, + ST_ASR = 2, + ST_ROR = 3, +}; + +enum IndexType +{ + INDEX_UNSIGNED, + INDEX_POST, + INDEX_PRE, +}; + +enum ShiftAmount +{ + SHIFT_0 = 0, + SHIFT_16 = 1, + SHIFT_32 = 2, + SHIFT_48 = 3, +}; + +enum ExtendType +{ + EXTEND_UXTW = 2, + EXTEND_LSL = 3, // Default for zero shift amount + EXTEND_SXTW = 6, + EXTEND_SXTX = 7, +}; + +struct FixupBranch +{ + u8 *ptr; + // Type defines + // 0 = CBZ (32bit) + // 1 = CBNZ (32bit) + // 2 = B (conditional) + // 3 = TBZ + // 4 = TBNZ + // 5 = B (unconditional) + // 6 = BL (unconditional) + u32 type; + + // Used with B.cond + CCFlags cond; + + // Used with TBZ/TBNZ + u8 bit; + + // Used with Test/Compare and Branch + ARM64Reg reg; +}; + +enum PStateField +{ + FIELD_SPSel = 0, + FIELD_DAIFSet, + FIELD_DAIFClr, +}; + +enum SystemHint +{ + HINT_NOP = 0, + HINT_YIELD, + HINT_WFE, + HINT_WFI, + HINT_SEV, + HINT_SEVL, +}; + +enum BarrierType +{ + OSHLD = 1, + OSHST = 2, + OSH = 3, + NSHLD = 5, + NSHST = 6, + NSH = 7, + ISHLD = 9, + ISHST = 10, + ISH = 11, + LD = 13, + ST = 14, + SY = 15, +}; + +class ArithOption +{ + public: + enum WidthSpecifier { + WIDTH_DEFAULT, + WIDTH_32BIT, + WIDTH_64BIT, + }; + enum ExtendSpecifier { + EXTEND_UXTB = 0x0, + EXTEND_UXTH = 0x1, + EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */ + EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */ + EXTEND_SXTB = 0x4, + EXTEND_SXTH = 0x5, + EXTEND_SXTW = 0x6, + EXTEND_SXTX = 0x7, + }; + enum TypeSpecifier { + TYPE_EXTENDEDREG, + TYPE_IMM, + TYPE_SHIFTEDREG, + }; + private: + ARM64Reg _destReg; + WidthSpecifier _width; + ExtendSpecifier _extend; + TypeSpecifier _type; + ShiftType _shifttype; + u32 _shift; + public: + ArithOption(ARM64Reg Rd) + { + _destReg = Rd; + _shift = 0; + _type = TYPE_EXTENDEDREG; + if (is64Bit(Rd)) + { + _width = WIDTH_64BIT; + _extend = EXTEND_UXTX; + } + else + { + _width = WIDTH_32BIT; + _extend = EXTEND_UXTW; + } + } + ArithOption(ARM64Reg Rd, ShiftType ShiftType, u32 Shift) + { + _destReg = Rd; + _shift = Shift; + _shifttype = ShiftType; + _type = TYPE_SHIFTEDREG; + if (is64Bit(Rd)) + _width = WIDTH_64BIT; + else + _width = WIDTH_32BIT; + } + TypeSpecifier GetType() + { + return _type; + } + u32 GetData() + { + switch (_type) + { + case TYPE_EXTENDEDREG: + return (_width == WIDTH_64BIT ? (1 << 31) : 0) | + (_extend << 13) | + (_shift << 10); + break; + case TYPE_SHIFTEDREG: + return (_width == WIDTH_64BIT ? (1 << 31) : 0) | + (_shifttype << 22) | + (_shift << 10); + break; + default: + _dbg_assert_msg_(DYNA_REC, false, "Invalid type in GetData"); + break; + } + return 0; + } +}; + +class ARM64XEmitter +{ +private: + u8 *code, *startcode; + u8 *lastCacheFlushEnd; + + void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr); + void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr); + void EncodeUnconditionalBranchInst(u32 op, const void* ptr); + void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn); + void EncodeExceptionInst(u32 instenc, u32 imm); + void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt); + void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn); + void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm); + void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt); + void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos); + void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend); + void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); + void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + +protected: + inline void Write32(u32 value) {*(u32*)code = value; code+=4;} + +public: + ARM64XEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {} + virtual ~ARM64XEmitter() {} + + void SetCodePtr(u8 *ptr); + void ReserveCodeSpace(u32 bytes); + const u8 *AlignCode16(); + const u8 *AlignCodePage(); + const u8 *GetCodePtr() const; + void FlushIcache(); + void FlushIcacheSection(u8 *start, u8 *end); + u8 *GetWritableCodePtr(); + + // FixupBranch branching + void SetJumpTarget(FixupBranch const &branch); + FixupBranch CBZ(ARM64Reg Rt); + FixupBranch CBNZ(ARM64Reg Rt); + FixupBranch B(CCFlags cond); + FixupBranch TBZ(ARM64Reg Rt, u8 bit); + FixupBranch TBNZ(ARM64Reg Rt, u8 bit); + FixupBranch B(); + FixupBranch BL(); + + // Compare and Branch + void CBZ(ARM64Reg Rt, const void* ptr); + void CBNZ(ARM64Reg Rt, const void* ptr); + + // Conditional Branch + void B(CCFlags cond, const void* ptr); + + // Test and Branch + void TBZ(ARM64Reg Rt, u8 bits, const void* ptr); + void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr); + + // Unconditional Branch + void B(const void *ptr); + void BL(const void *ptr); + + // Unconditional Branch (register) + void BR(ARM64Reg Rn); + void BLR(ARM64Reg Rn); + void RET(ARM64Reg Rn); + void ERET(); + void DRPS(); + + // Exception generation + void SVC(u32 imm); + void HVC(u32 imm); + void SMC(u32 imm); + void BRK(u32 imm); + void HLT(u32 imm); + void DCPS1(u32 imm); + void DCPS2(u32 imm); + void DCPS3(u32 imm); + + // System + void _MSR(PStateField field, u8 imm); + void HINT(SystemHint op); + void CLREX(); + void DSB(BarrierType type); + void DMB(BarrierType type); + void ISB(BarrierType type); + + // Add/Subtract (Extended/Shifted register) + void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void CMN(ARM64Reg Rn, ARM64Reg Rm); + void CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void CMP(ARM64Reg Rn, ARM64Reg Rm); + void CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + + // Add/Subtract (with carry) + void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // Conditional Compare (immediate) + void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + + // Conditional Compare (register) + void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + + // Conditional Select + void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + + // Data-Processing 1 source + void RBIT(ARM64Reg Rd, ARM64Reg Rn); + void REV16(ARM64Reg Rd, ARM64Reg Rn); + void REV32(ARM64Reg Rd, ARM64Reg Rn); + void REV64(ARM64Reg Rd, ARM64Reg Rn); + void CLZ(ARM64Reg Rd, ARM64Reg Rn); + void CLS(ARM64Reg Rd, ARM64Reg Rn); + + // Data-Processing 2 source + void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // Data-Processing 3 source + void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + + // Logical (shifted register) + void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + + // Logical (immediate) + void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void TST(ARM64Reg Rn, u32 immr, u32 imms); + + // Add/subtract (immediate) + void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void CMP(ARM64Reg Rn, u32 imm, bool shift = false); + + // Data Processing (Immediate) + void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + + // Bitfield move + void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + + // Load Register (Literal) + void LDR(ARM64Reg Rt, u32 imm); + void LDRSW(ARM64Reg Rt, u32 imm); + void PRFM(ARM64Reg Rt, u32 imm); + + // Load/Store Exclusive + void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void LDXRB(ARM64Reg Rt, ARM64Reg Rn); + void LDAXRB(ARM64Reg Rt, ARM64Reg Rn); + void STLRB(ARM64Reg Rt, ARM64Reg Rn); + void LDARB(ARM64Reg Rt, ARM64Reg Rn); + void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void LDXRH(ARM64Reg Rt, ARM64Reg Rn); + void LDAXRH(ARM64Reg Rt, ARM64Reg Rn); + void STLRH(ARM64Reg Rt, ARM64Reg Rn); + void LDARH(ARM64Reg Rt, ARM64Reg Rn); + void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void LDXR(ARM64Reg Rt, ARM64Reg Rn); + void LDAXR(ARM64Reg Rt, ARM64Reg Rn); + void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void STLR(ARM64Reg Rt, ARM64Reg Rn); + void LDAR(ARM64Reg Rt, ARM64Reg Rn); + + // Load/Store no-allocate pair (offset) + void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + + // Load/Store register (immediate indexed) + void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + + // Load/Store register (register offset) + void STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + + // Wrapper around MOVZ+MOVK + void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); +}; + +class ARM64CodeBlock : public CodeBlock +{ +private: + void PoisonMemory() override + { + u32* ptr = (u32*)region; + u32* maxptr = (u32*)region + region_size; + // If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything + // Less than optimal, but there would be nothing we could do but throw a runtime warning anyway. + // AArch64: 0xD4200000 = BRK 0 + while (ptr < maxptr) + *ptr++ = 0xD4200000; + } +}; +} + diff --git a/Source/Core/Common/ArmCommon.h b/Source/Core/Common/ArmCommon.h new file mode 100644 index 0000000000..ae919d9232 --- /dev/null +++ b/Source/Core/Common/ArmCommon.h @@ -0,0 +1,26 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +enum CCFlags +{ + CC_EQ = 0, // Equal + CC_NEQ, // Not equal + CC_CS, // Carry Set + CC_CC, // Carry Clear + CC_MI, // Minus (Negative) + CC_PL, // Plus + CC_VS, // Overflow + CC_VC, // No Overflow + CC_HI, // Unsigned higher + CC_LS, // Unsigned lower or same + CC_GE, // Signed greater than or equal + CC_LT, // Signed less than + CC_GT, // Signed greater than + CC_LE, // Signed less than or equal + CC_AL, // Always (unconditional) 14 + CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same + CC_LO = CC_CC, // Alias of CC_CC Unsigned lower +}; +const u32 NO_COND = 0xE0000000; + diff --git a/Source/Core/Common/ArmEmitter.h b/Source/Core/Common/ArmEmitter.h index d33269d090..fbc0f80d67 100644 --- a/Source/Core/Common/ArmEmitter.h +++ b/Source/Core/Common/ArmEmitter.h @@ -8,6 +8,7 @@ #include +#include "Common/ArmCommon.h" #include "Common/CodeBlock.h" #include "Common/Common.h" @@ -61,28 +62,6 @@ enum ARMReg INVALID_REG = 0xFFFFFFFF }; -enum CCFlags -{ - CC_EQ = 0, // Equal - CC_NEQ, // Not equal - CC_CS, // Carry Set - CC_CC, // Carry Clear - CC_MI, // Minus (Negative) - CC_PL, // Plus - CC_VS, // Overflow - CC_VC, // No Overflow - CC_HI, // Unsigned higher - CC_LS, // Unsigned lower or same - CC_GE, // Signed greater than or equal - CC_LT, // Signed less than - CC_GT, // Signed greater than - CC_LE, // Signed less than or equal - CC_AL, // Always (unconditional) 14 - CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same - CC_LO = CC_CC, // Alias of CC_CC Unsigned lower -}; -const u32 NO_COND = 0xE0000000; - enum ShiftType { ST_LSL = 0, diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index aebfc3e7ea..cfdf6cd14c 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -31,19 +31,23 @@ set(SRCS BreakPoints.cpp Logging/LogManager.cpp) -if(_M_ARM_32) #ARMv7 - set(SRCS ${SRCS} - ArmCPUDetect.cpp - ArmEmitter.cpp - GenericFPURoundMode.cpp) -elseif(_M_X86) #X86 - set(SRCS ${SRCS} - x64CPUDetect.cpp - x64FPURoundMode.cpp) -else() #Generic - set(SRCS ${SRCS} - GenericFPURoundMode.cpp - x64CPUDetect.cpp) +if(_M_ARM) + if (_M_ARM_32) #ARMv7 + set(SRCS ${SRCS} + ArmEmitter.cpp) + else() #AArch64 + set(SRCS ${SRCS} + Arm64Emitter.cpp) + endif() + set(SRCS ${SRCS} + ArmCPUDetect.cpp + GenericFPURoundMode.cpp) +else() + if(_M_X86) #X86 + set(SRCS ${SRCS} + x64FPURoundMode.cpp) + endif() + set(SRCS ${SRCS} x64CPUDetect.cpp) endif() if(WIN32) set(SRCS ${SRCS} ExtendedTrace.cpp) diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index b760d58df7..78172d5cb0 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -198,8 +198,7 @@ if(_M_X86) PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/JitAsmCommon.cpp PowerPC/JitCommon/Jit_Util.cpp) -endif() -if(_M_ARM_32) +elseif(_M_ARM_32) set(SRCS ${SRCS} ArmMemTools.cpp PowerPC/JitArm32/Jit.cpp @@ -218,6 +217,16 @@ if(_M_ARM_32) PowerPC/JitArm32/JitArm_SystemRegisters.cpp PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp ) +elseif(_M_ARM_64) + set(SRCS ${SRCS} + PowerPC/JitArm64/Jit.cpp + PowerPC/JitArm64/JitAsm.cpp + PowerPC/JitArm64/JitArm64Cache.cpp + PowerPC/JitArm64/JitArm64_RegCache.cpp + PowerPC/JitArm64/JitArm64_Branch.cpp + PowerPC/JitArm64/JitArm64_LoadStore.cpp + PowerPC/JitArm64/JitArm64_SystemRegisters.cpp + PowerPC/JitArm64/JitArm64_Tables.cpp) endif() set(LIBS diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp new file mode 100644 index 0000000000..f8322a5438 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -0,0 +1,287 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/PatchEngine.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" + +using namespace Arm64Gen; + +static int CODE_SIZE = 1024*1024*32; + +void JitArm64::Init() +{ + AllocCodeSpace(CODE_SIZE); + jo.enableBlocklink = true; + gpr.Init(this); + fpr.Init(this); + + blocks.Init(); + asm_routines.Init(); + + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; +} + +void JitArm64::ClearCache() +{ + ClearCodeSpace(); + blocks.Clear(); +} + +void JitArm64::Shutdown() +{ + FreeCodeSpace(); + blocks.Shutdown(); + asm_routines.Shutdown(); +} + +void JitArm64::unknown_instruction(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "unknown_instruction %08x - Fix me ;)", inst.hex); +} + +void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) +{ + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + MOVI2R(W0, inst.hex); + MOVI2R(X30, (u64)instr); + BLR(X30); +} + +void JitArm64::HLEFunction(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "HLEFunction %08x - Fix me ;)", inst.hex); + exit(0); +} + +void JitArm64::DoNothing(UGeckoInstruction inst) +{ + // Yup, just don't do anything. +} + +void JitArm64::Break(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "Breaking! %08x - Fix me ;)", inst.hex); + exit(0); +} + +void JitArm64::DoDownCount() +{ + ARM64Reg WA = gpr.GetReg(); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + if (js.downcountAmount < 4096) // We can enlarge this if we used rotations + { + SUBS(WA, WA, js.downcountAmount); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + } + else + { + ARM64Reg WB = gpr.GetReg(); + MOVI2R(WB, js.downcountAmount); + SUBS(WA, WA, WB); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + gpr.Unlock(WB); + } + gpr.Unlock(WA); +} + +// Exits +void JitArm64::WriteExit(u32 destination) +{ + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + JitBlock::LinkData linkData; + linkData.exitAddress = destination; + linkData.exitPtrs = GetWritableCodePtr(); + linkData.linkStatus = false; + + DoDownCount(); + + // Link opportunity! + int block; + if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) + { + // It exists! Joy of joy! + B(blocks.GetBlock(block)->checkedEntry); + linkData.linkStatus = true; + } + else + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + MOVI2R(WA, destination); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + MOVI2R(XA, (u64)asm_routines.dispatcher); + BR(XA); + gpr.Unlock(WA); + } + + b->linkData.push_back(linkData); +} +void JitArm64::WriteExceptionExit(ARM64Reg dest) +{ + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + gpr.Unlock(dest); + DoDownCount(); + MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions); + BLR(EncodeRegTo64(dest)); + LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + + MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher); + BR(EncodeRegTo64(dest)); +} + +void JitArm64::WriteExitDestInR(ARM64Reg Reg) +{ + STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); + gpr.Unlock(Reg); + DoDownCount(); + MOVI2R(EncodeRegTo64(Reg), (u64)asm_routines.dispatcher); + BR(EncodeRegTo64(Reg)); +} + +void STACKALIGN JitArm64::Run() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void JitArm64::SingleStep() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void STACKALIGN JitArm64::Jit(u32 em_address) +{ + if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache) + { + ClearCache(); + } + + int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); + JitBlock *b = blocks.GetBlock(block_num); + const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); + blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); +} + +const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) +{ + int blockSize = code_buf->GetSize(); + + if (Core::g_CoreStartupParameter.bEnableDebugging) + { + // Comment out the following to disable breakpoints (speed-up) + blockSize = 1; + } + + if (em_address == 0) + { + Core::SetState(Core::CORE_PAUSE); + WARN_LOG(DYNA_REC, "ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); + } + + js.isLastInstruction = false; + js.blockStart = em_address; + js.fifoBytesThisBlock = 0; + js.downcountAmount = 0; + js.skipnext = false; + js.curBlock = b; + + u32 nextPC = em_address; + // Analyze the block, collect all instructions it is made of (including inlining, + // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; + + const u8 *start = GetCodePtr(); + b->checkedEntry = start; + b->runCount = 0; + + // Downcount flag check, Only valid for linked blocks + { + FixupBranch bail = B(CC_PL); + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + MOVI2R(WA, js.blockStart); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + MOVI2R(XA, (u64)asm_routines.doTiming); + BR(XA); + gpr.Unlock(WA); + SetJumpTarget(bail); + } + + const u8 *normalEntry = GetCodePtr(); + b->normalEntry = normalEntry; + + gpr.Start(js.gpa); + fpr.Start(js.fpa); + + if (!Core::g_CoreStartupParameter.bEnableDebugging) + js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); + + // Translate instructions + for (u32 i = 0; i < code_block.m_num_instructions; i++) + { + js.compilerPC = ops[i].address; + js.op = &ops[i]; + js.instructionNumber = i; + const GekkoOPInfo *opinfo = ops[i].opinfo; + js.downcountAmount += opinfo->numCycles; + + if (i == (code_block.m_num_instructions - 1)) + { + // WARNING - cmp->branch merging will screw this up. + js.isLastInstruction = true; + js.next_inst = 0; + } + else + { + // help peephole optimizations + js.next_inst = ops[i + 1].inst; + js.next_compilerPC = ops[i + 1].address; + } + if (!ops[i].skip) + { + if (js.memcheck && (opinfo->flags & FL_USE_FPU)) + { + // Don't do this yet + BRK(0x7777); + } + + JitArm64Tables::CompileInstruction(ops[i]); + + if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) + { + // Don't do this yet + BRK(0x666); + } + } + } + + if (code_block.m_memory_exception) + BRK(0x500); + + if (code_block.m_broken) + { + printf("Broken Block going to 0x%08x\n", nextPC); + WriteExit(nextPC); + } + + b->codeSize = (u32)(GetCodePtr() - normalEntry); + b->originalSize = code_block.m_num_instructions; + FlushIcache(); + return start; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h new file mode 100644 index 0000000000..ffa7a060f6 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -0,0 +1,104 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/Arm64Emitter.h" + +#include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/PPCAnalyst.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitArm64Cache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" +#include "Core/PowerPC/JitCommon/JitBase.h" + +#define PPCSTATE_OFF(elem) ((s64)&PowerPC::ppcState.elem - (s64)&PowerPC::ppcState) + +// Some asserts to make sure we will be able to load everything +static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); +static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); + +using namespace Arm64Gen; +class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock +{ +public: + JitArm64() : code_buffer(32000) {} + ~JitArm64() {} + + void Init(); + void Shutdown(); + + JitBaseBlockCache *GetBlockCache() { return &blocks; } + + const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return NULL; } + + bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } + + void ClearCache(); + + CommonAsmRoutinesBase *GetAsmRoutines() + { + return &asm_routines; + } + + void Run(); + void SingleStep(); + + void Jit(u32 em_address); + + const char *GetName() + { + return "JITARM64"; + } + + // OPCODES + void unknown_instruction(UGeckoInstruction inst); + void FallBackToInterpreter(UGeckoInstruction inst); + void DoNothing(UGeckoInstruction inst); + void HLEFunction(UGeckoInstruction inst); + + void DynaRunTable4(UGeckoInstruction inst); + void DynaRunTable19(UGeckoInstruction inst); + void DynaRunTable31(UGeckoInstruction inst); + void DynaRunTable59(UGeckoInstruction inst); + void DynaRunTable63(UGeckoInstruction inst); + + // Force break + void Break(UGeckoInstruction inst); + + // Branch + void sc(UGeckoInstruction inst); + void rfi(UGeckoInstruction inst); + void bx(UGeckoInstruction inst); + void bcx(UGeckoInstruction inst); + void bcctrx(UGeckoInstruction inst); + void bclrx(UGeckoInstruction inst); + + // System Registers + void mtmsr(UGeckoInstruction inst); + + // LoadStore + void icbi(UGeckoInstruction inst); + +private: + Arm64GPRCache gpr; + Arm64FPRCache fpr; + + JitArm64BlockCache blocks; + JitArm64AsmRoutineManager asm_routines; + + PPCAnalyst::CodeBuffer code_buffer; + + const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); + + void DoDownCount(); + + // Exits + void WriteExit(u32 destination); + void WriteExceptionExit(ARM64Reg dest); + void WriteExitDestInR(ARM64Reg dest); + + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); +}; + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp new file mode 100644 index 0000000000..55c319a7cc --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -0,0 +1,16 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64Cache.h" + +void JitArm64BlockCache::WriteLinkBlock(u8* location, const u8* address) +{ +} + +void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address) +{ +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h new file mode 100644 index 0000000000..3868751597 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h @@ -0,0 +1,17 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/JitCommon/JitCache.h" + + +typedef void (*CompiledCode)(); + +class JitArm64BlockCache : public JitBaseBlockCache +{ +private: + void WriteLinkBlock(u8* location, const u8* address); + void WriteDestroyBlock(const u8* location, u32 address); +}; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp new file mode 100644 index 0000000000..037e995718 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -0,0 +1,254 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::sc(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + ORR(WA, WA, 31, 0); // Same as WA | EXCEPTION_SYSCALL + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + + MOVI2R(WA, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + + // WA is unlocked in this function + WriteExceptionExit(WA); +} + +void JitArm64::rfi(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + // See Interpreter rfi for details + const u32 mask = 0x87C0FFFF; + const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] + // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; + // R0 = MSR location + // R1 = MSR contents + // R2 = Mask + // R3 = Mask + ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg WC = gpr.GetReg(); + + MOVI2R(WA, (~mask) & clearMSR13); + MOVI2R(WB, mask & clearMSR13); + + LDR(INDEX_UNSIGNED, WC, X29, PPCSTATE_OFF(msr)); + + AND(WC, WC, WB, ArithOption(WC, ST_LSL, 0)); // rD = Masked MSR + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here + + AND(WA, WA, WB, ArithOption(WA, ST_LSL, 0)); // rB contains masked SRR1 here + ORR(WA, WA, WC, ArithOption(WA, ST_LSL, 0)); // rB = Masked MSR OR masked SRR1 + + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); // STR rB in to rA + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR0])); + gpr.Unlock(WB, WC); + + // WA is unlocked in this function + WriteExceptionExit(WA); +} + +void JitArm64::bx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + u32 destination; + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = js.compilerPC + SignExt26(inst.LI << 2); + + if (inst.LK) + { + u32 Jumpto = js.compilerPC + 4; + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, Jumpto); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WA); + } + + if (destination == js.compilerPC) + { + // make idle loops go faster + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + MOVI2R(XA, (u64)&CoreTiming::Idle); + BLR(XA); + MOVI2R(WA, js.compilerPC); + WriteExceptionExit(WA); + } + + WriteExit(destination); +} + +void JitArm64::bcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + SUBS(WA, WA, 1); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + if (inst.LK) + { + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WA, Jumpto); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + } + gpr.Unlock(WA); + + u32 destination; + if (inst.AA) + destination = SignExt16(inst.BD << 2); + else + destination = js.compilerPC + SignExt16(inst.BD << 2); + WriteExit(destination); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + + WriteExit(js.compilerPC + 4); +} + +void JitArm64::bcctrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + // bcctrx doesn't decrement and/or test CTR + _assert_msg_(DYNA_REC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); + + if (inst.BO_2 & BO_DONT_CHECK_CONDITION) + { + // BO_2 == 1z1zz -> b always + + //NPC = CTR & 0xfffffffc; + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + if (inst.LK_3) + { + ARM64Reg WB = gpr.GetReg(); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WB, Jumpto); + STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WB); + } + + ARM64Reg WA = gpr.GetReg(); + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. + WriteExitDestInR(WA); + } + else + { + // Rare condition seen in (just some versions of?) Nintendo's NES Emulator + // BO_2 == 001zy -> b if false + // BO_2 == 011zy -> b if true + _assert_msg_(DYNA_REC, false, "Haven't implemented rare form of bcctrx yet"); + } +} + +void JitArm64::bclrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + SUBS(WA, WA, 1); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. + + if (inst.LK) + { + ARM64Reg WB = gpr.GetReg(); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WB, Jumpto); + STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WB); + } + + WriteExitDestInR(WA); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp new file mode 100644 index 0000000000..783a9ce78b --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -0,0 +1,22 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::icbi(UGeckoInstruction inst) +{ + FallBackToInterpreter(inst); + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp new file mode 100644 index 0000000000..a165828037 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -0,0 +1,319 @@ +// copyright 2014 dolphin emulator project +// licensed under gplv2 +// refer to the license.txt file included. + +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" + +using namespace Arm64Gen; + +void Arm64RegCache::Init(ARM64XEmitter *emitter) +{ + m_emit = emitter; + GetAllocationOrder(); +} + +ARM64Reg Arm64RegCache::GetReg(void) +{ + for (auto& it : m_host_registers) + { + if (!it.IsLocked()) + { + it.Lock(); + return it.GetReg(); + } + } + // Holy cow, how did you run out of registers? + // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure happen + _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); + return INVALID_REG; +} + +void Arm64RegCache::LockRegister(ARM64Reg host_reg) +{ + auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); + if (reg == m_host_registers.end()) + _assert_msg_(DYNA_REC, false, "Don't try locking a register that isn't in the cache"); + _assert_msg_(DYNA_REC, !reg->IsLocked(), "This register is already locked"); + reg->Lock(); +} + +void Arm64RegCache::UnlockRegister(ARM64Reg host_reg) +{ + auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); + if (reg == m_host_registers.end()) + _assert_msg_(DYNA_REC, false, "Don't try unlocking a register that isn't in the cache"); + _assert_msg_(DYNA_REC, reg->IsLocked(), "This register is already unlocked"); + reg->Unlock(); +} + +// GPR Cache +void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats) +{ + // To make this technique easy, let's just work on pairs of even/odd registers + // We could do simple odd/even as well to get a few spare temporary registers + // but it isn't really needed, we aren't starved for registers + for (int reg = 0; reg < 32; reg += 2) + { + u32 regs_used = (stats.IsUsed(reg) << 1) | stats.IsUsed(reg + 1); + switch (regs_used) + { + case 0x02: // Reg+0 used + { + ARM64Reg host_reg = GetReg(); + m_guest_registers[reg].LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); + } + break; + case 0x01: // Reg+1 used + { + ARM64Reg host_reg = GetReg(); + m_guest_registers[reg + 1].LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg + 1])); + } + break; + case 0x03: // Both registers used + { + // Get a 64bit host register + ARM64Reg host_reg = EncodeRegTo64(GetReg()); + m_guest_registers[reg].LoadToAway(host_reg, REG_LOW); + m_guest_registers[reg + 1].LoadToAway(host_reg, REG_HIGH); + + // host_reg is 64bit here. + // It'll load both guest_registers in one LDR + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); + } + break; + case 0x00: // Neither used + default: + break; + } + } +} + +bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) +{ + static std::vector callee_regs = + { + X28, X27, X26, X25, X24, X23, X22, X21, X20, + X19, INVALID_REG, + }; + return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); +} + +void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) +{ + for (int i = 0; i < 32; ++i) + { + bool flush = true; + if (mode == FLUSH_INTERPRETER) + { + if (!(op->regsOut[0] == i || + op->regsOut[1] == i || + op->regsIn[0] == i || + op->regsIn[1] == i || + op->regsIn[2] == i)) + { + // This interpreted instruction doesn't use this register + flush = false; + } + } + + if (m_guest_registers[i].GetType() == REG_REG) + { + // Has to be flushed if it isn't in a callee saved register + ARM64Reg host_reg = m_guest_registers[i].GetReg(); + if (flush || !IsCalleeSaved(host_reg)) + { + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + Unlock(host_reg); + + m_guest_registers[i].Flush(); + } + } + else if (m_guest_registers[i].GetType() == REG_IMM) + { + if (flush) + { + ARM64Reg host_reg = GetReg(); + + m_emit->MOVI2R(host_reg, m_guest_registers[i].GetImm()); + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + + Unlock(host_reg); + + m_guest_registers[i].Flush(); + } + } + else if (m_guest_registers[i].GetType() == REG_AWAY) + { + // We are away, that means that this register and the next are stored in a single 64bit register + // There is a very good chance that both the registers are out in some "temp" register + bool flush_2 = true; + if (mode == FLUSH_INTERPRETER) + { + if (!(op->regsOut[0] == (i + 1) || + op->regsOut[1] == (i + 1) || + op->regsIn[0] == (i + 1) || + op->regsIn[1] == (i + 1) || + op->regsIn[2] == (i + 1))) + { + // This interpreted instruction doesn't use this register + flush_2 = false; + } + } + + ARM64Reg host_reg = m_guest_registers[i].GetAwayReg(); + ARM64Reg host_reg_1 = m_guest_registers[i].GetReg(); + ARM64Reg host_reg_2 = m_guest_registers[i + 1].GetReg(); + // Flush if either of these shared registers are used. + if (flush || + flush_2 || + !IsCalleeSaved(host_reg) || + !IsCalleeSaved(host_reg_1) || + !IsCalleeSaved(host_reg_2)) + { + + if (host_reg_1 == INVALID_REG) + { + // We never loaded this register + // We've got to test the state of our shared register + // Currently it is always reg+1 + if (host_reg_2 == INVALID_REG) + { + // We didn't load either of these registers + // This can happen in cases where we had to flush register state + // or if we hit an interpreted instruction before we could use it + // Dump the whole thing in one go and flush both registers + + // 64bit host register will store 2 32bit store registers in one go + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + } + else + { + // Alright, bottom register isn't used, but top one is + // Only store the top one + m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[i + 1])); + Unlock(host_reg_2); + } + } + else + { + m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[i])); + Unlock(host_reg_1); + } + // Flush both registers + m_guest_registers[i].Flush(); + m_guest_registers[i + 1].Flush(); + Unlock(DecodeReg(host_reg)); + } + // Skip the next register since we've handled it here + ++i; + } + } +} + +ARM64Reg Arm64GPRCache::R(u32 preg) +{ + OpArg& reg = m_guest_registers[preg]; + switch (reg.GetType()) + { + case REG_REG: // already in a reg + return reg.GetReg(); + break; + case REG_IMM: // Is an immediate + { + ARM64Reg host_reg = GetReg(); + m_emit->MOVI2R(host_reg, reg.GetImm()); + } + break; + case REG_AWAY: // Register is away in a shared register + { + // Let's do the voodoo that we dodo + if (reg.GetReg() == INVALID_REG) + { + // Alright, we need to move to a valid location + ARM64Reg host_reg = GetReg(); + reg.LoadAwayToReg(host_reg); + + // Alright, we need to extract from our away register + // To our new 32bit register + if (reg.GetAwayLocation() == REG_LOW) + { + // We are in the low bits + // Just move it over to the low bits of the new register + m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 0, 31); + } + else + { + // We are in the high bits + m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 32, 63); + } + } + else + { + // We've already moved to a valid place to work on + return reg.GetReg(); + } + } + break; + case REG_NOTLOADED: // Register isn't loaded at /all/ + { + // This is kind of annoying, we shouldn't have gotten here + // This can happen with instructions that use multiple registers(eg lmw) + // The PPCAnalyst needs to be modified to handle these cases + _dbg_assert_msg_(DYNA_REC, false, "Hit REG_NOTLOADED type oparg. Fix the PPCAnalyst"); + ARM64Reg host_reg = GetReg(); + reg.LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + return host_reg; + } + break; + default: + _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); + break; + } + // We've got an issue if we end up here + return INVALID_REG; +} + +void Arm64GPRCache::GetAllocationOrder(void) +{ + // Callee saved registers first in hopes that we will keep everything stored there first + const std::vector allocation_order = + { + W28, W27, W26, W25, W24, W23, W22, W21, W20, + W19, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, + W10, W11, W12, W13, W14, W15, W16, W17, W18, + W30, + }; + + for (ARM64Reg reg : allocation_order) + m_host_registers.push_back(HostReg(reg)); +} + +// FPR Cache +void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) +{ + // XXX: Flush our stuff +} + +ARM64Reg Arm64FPRCache::R(u32 preg) +{ + // XXX: return a host reg holding a guest register +} + +void Arm64FPRCache::GetAllocationOrder(void) +{ + const std::vector allocation_order = + { + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + D11, D12, D13, D14, D15, D16, D17, D18, D19, + D20, D21, D22, D23, D24, D25, D26, D27, D28, + D29, D30, D31, + }; + + for (ARM64Reg reg : allocation_order) + m_host_registers.push_back(HostReg(reg)); +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h new file mode 100644 index 0000000000..58c21c2ae1 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -0,0 +1,251 @@ +// copyright 2014 dolphin emulator project +// licensed under gplv2 +// refer to the license.txt file included. + +#pragma once + +#include + +#include "Common/Arm64Emitter.h" +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/PPCAnalyst.h" + +// Dedicated host registers +// X29 = ppcState pointer +using namespace Arm64Gen; + +enum RegType +{ + REG_NOTLOADED = 0, + REG_REG, // Reg type is register + REG_IMM, // Reg is really a IMM + REG_AWAY, // Reg is away +}; +enum RegLocation +{ + REG_LOW = 0, + REG_HIGH, +}; + +enum FlushMode +{ + // Flushes all registers, no exceptions + FLUSH_ALL = 0, + // Flushes registers in a conditional branch + // Doesn't wipe the state of the registers from the cache + FLUSH_MAINTAIN_STATE, + // Flushes only the required registers for an interpreter call + FLUSH_INTERPRETER, +}; + +class OpArg +{ +public: + OpArg() + { + m_type = REG_NOTLOADED; + m_reg = INVALID_REG; + m_value = 0; + } + + RegType GetType() + { + return m_type; + } + + ARM64Reg GetReg() + { + return m_reg; + } + ARM64Reg GetAwayReg() + { + return m_away_reg; + } + RegLocation GetAwayLocation() + { + return m_away_location; + } + u32 GetImm() + { + return m_value; + } + void LoadToReg(ARM64Reg reg) + { + m_type = REG_REG; + m_reg = reg; + } + void LoadToAway(ARM64Reg reg, RegLocation location) + { + m_type = REG_AWAY; + m_reg = INVALID_REG; + m_away_reg = reg; + m_away_location = location; + } + void LoadAwayToReg(ARM64Reg reg) + { + // We are still an away type + // We just are also in another register + m_reg = reg; + } + void LoadToImm(u32 imm) + { + m_type = REG_IMM; + m_value = imm; + } + void Flush() + { + m_type = REG_NOTLOADED; + } + +private: + // For REG_REG + RegType m_type; // store type + ARM64Reg m_reg; // host register we are in + + // For REG_AWAY + // Host register that we are away in + // This is a 64bit register + ARM64Reg m_away_reg; + RegLocation m_away_location; + + // For REG_IMM + u32 m_value; // IMM value +}; + +class HostReg +{ +public: + HostReg() : m_reg(INVALID_REG), m_locked(false) {} + HostReg(ARM64Reg reg) : m_reg(reg), m_locked(false) {} + bool IsLocked(void) { return m_locked; } + void Lock(void) { m_locked = true; } + void Unlock(void) { m_locked = false; } + ARM64Reg GetReg(void) { return m_reg; } + + bool operator==(const ARM64Reg& reg) + { + return reg == m_reg; + } + +private: + ARM64Reg m_reg; + bool m_locked; +}; + +class Arm64RegCache +{ +public: + Arm64RegCache(void) : m_emit(nullptr), m_reg_stats(nullptr) {}; + virtual ~Arm64RegCache() {}; + + void Init(ARM64XEmitter *emitter); + + virtual void Start(PPCAnalyst::BlockRegStats &stats) {} + + // Flushes the register cache in different ways depending on the mode + virtual void Flush(FlushMode mode, PPCAnalyst::CodeOp* op) = 0; + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + virtual ARM64Reg R(u32 reg) = 0; + + // Returns a temporary register for use + // Requires unlocking after done + ARM64Reg GetReg(void); + + // Locks a register so a cache cannot use it + // Useful for function calls + template + void Lock(Args... args) + { + for (T reg : {args...}) + { + LockRegister(reg); + } + } + + // Unlocks a locked register + // Unlocks registers locked with both GetReg and LockRegister + template + void Unlock(Args... args) + { + for (T reg : {args...}) + { + UnlockRegister(reg); + } + } + +protected: + // Get the order of the host registers + virtual void GetAllocationOrder(void) = 0; + + // Lock a register + void LockRegister(ARM64Reg host_reg); + + // Unlock a register + void UnlockRegister(ARM64Reg host_reg); + + // Code emitter + ARM64XEmitter *m_emit; + + // Host side registers that hold the host registers in order of use + std::vector m_host_registers; + + // Register stats for the current block + PPCAnalyst::BlockRegStats *m_reg_stats; +}; + +class Arm64GPRCache : public Arm64RegCache +{ +public: + ~Arm64GPRCache() {} + + void Start(PPCAnalyst::BlockRegStats &stats); + + // Flushes the register cache in different ways depending on the mode + void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr); + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + ARM64Reg R(u32 preg); + + // Set a register to an immediate + void SetImmediate(u32 reg, u32 imm) { m_guest_registers[reg].LoadToImm(imm); } + + // Returns if a register is set as an immediate + bool IsImm(u32 reg) { return m_guest_registers[reg].GetType() == REG_IMM; } + + // Gets the immediate that a register is set to + u32 GetImm(u32 reg) { return m_guest_registers[reg].GetImm(); } + +protected: + // Get the order of the host registers + void GetAllocationOrder(void); + + // Our guest GPRs + // PowerPC has 32 GPRs + OpArg m_guest_registers[32]; + +private: + bool IsCalleeSaved(ARM64Reg reg); +}; + +class Arm64FPRCache : public Arm64RegCache +{ +public: + ~Arm64FPRCache() {} + // Flushes the register cache in different ways depending on the mode + void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr); + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + ARM64Reg R(u32 preg); + +protected: + // Get the order of the host registers + void GetAllocationOrder(void); + + // Our guest FPRs + // Gekko has 32 paired registers(32x2) + OpArg m_guest_registers[32][2]; +}; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp new file mode 100644 index 0000000000..7c72a0ccac --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -0,0 +1,60 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + FixupBranch branch; + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61); + break; + case CR_EQ_BIT: // check bits 31-0 == 0 + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? CBZ(WA) : CBNZ(WA); + break; + case CR_GT_BIT: // check val > 0 + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + CMP(XA, SP); + branch = B(jump_if_set ? CC_GT : CC_LE); + break; + case CR_LT_BIT: // check bit 62 set + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62); + break; + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + gpr.Unlock(WA); + return branch; +} + +void JitArm64::mtmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START + // Don't interpret this, if we do we get thrown out + //JITDISABLE(bJITSystemRegistersOff) + + STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr)); + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp new file mode 100644 index 0000000000..d798ed5056 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -0,0 +1,493 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" + +// Should be moved in to the Jit class +typedef void (JitArm64::*_Instruction) (UGeckoInstruction instCode); + +static _Instruction dynaOpTable[64]; +static _Instruction dynaOpTable4[1024]; +static _Instruction dynaOpTable19[1024]; +static _Instruction dynaOpTable31[1024]; +static _Instruction dynaOpTable59[32]; +static _Instruction dynaOpTable63[1024]; + +void JitArm64::DynaRunTable4(UGeckoInstruction inst) {(this->*dynaOpTable4 [inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable19(UGeckoInstruction inst) {(this->*dynaOpTable19[inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable31(UGeckoInstruction inst) {(this->*dynaOpTable31[inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable59(UGeckoInstruction inst) {(this->*dynaOpTable59[inst.SUBOP5 ])(inst);} +void JitArm64::DynaRunTable63(UGeckoInstruction inst) {(this->*dynaOpTable63[inst.SUBOP10])(inst);} + +struct GekkoOPTemplate +{ + int opcode; + _Instruction Inst; + //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out +}; + +static GekkoOPTemplate primarytable[] = +{ + {4, &JitArm64::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, + {19, &JitArm64::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, + {31, &JitArm64::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, + {59, &JitArm64::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, + {63, &JitArm64::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, + + {16, &JitArm64::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {18, &JitArm64::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + + {1, &JitArm64::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {2, &JitArm64::FallBackToInterpreter}, //"DynaBlock", OPTYPE_SYSTEM, 0}}, + {3, &JitArm64::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + + {7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, + {8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {10, &JitArm64::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {11, &JitArm64::FallBackToInterpreter}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, + {14, &JitArm64::FallBackToInterpreter}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + {15, &JitArm64::FallBackToInterpreter}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + + {20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, + {21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, + + {24, &JitArm64::FallBackToInterpreter}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {25, &JitArm64::FallBackToInterpreter}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {26, &JitArm64::FallBackToInterpreter}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {27, &JitArm64::FallBackToInterpreter}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {28, &JitArm64::FallBackToInterpreter}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + {29, &JitArm64::FallBackToInterpreter}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + + {32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + + {44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + + {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + + {48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, + {49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, + {51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + + {52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, + {53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, + {55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + + {56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, + {57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + {60, &JitArm64::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}}, + {61, &JitArm64::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + + //missing: 0, 5, 6, 9, 22, 30, 62, 58 + {0, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {5, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {6, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {9, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {22, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {30, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {62, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {58, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, +}; + +static GekkoOPTemplate table4[] = +{ //SUBOP10 + {0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, + {136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, + {264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, + {560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, + {592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, + {624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, + + {1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table4_2[] = +{ + {10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, + {11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, + {12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, + {13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, + {14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, + {15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, + {18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, + {20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, + {21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, + {23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, + {24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, + {25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, + {26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, + {28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, + {29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, + {30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, + {31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, +}; + + +static GekkoOPTemplate table4_3[] = +{ + {6, &JitArm64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, + {7, &JitArm64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, + {38, &JitArm64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, + {39, &JitArm64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, +}; + +static GekkoOPTemplate table19[] = +{ + {528, &JitArm64::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {16, &JitArm64::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {257, &JitArm64::FallBackToInterpreter}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &JitArm64::FallBackToInterpreter}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &JitArm64::FallBackToInterpreter}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &JitArm64::FallBackToInterpreter}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &JitArm64::FallBackToInterpreter}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &JitArm64::FallBackToInterpreter}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &JitArm64::FallBackToInterpreter}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &JitArm64::FallBackToInterpreter}, //"crxor", OPTYPE_CR, FL_EVIL}}, + + {150, &JitArm64::FallBackToInterpreter}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, + {0, &JitArm64::FallBackToInterpreter}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, + + {50, &JitArm64::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, + {18, &JitArm64::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} +}; + + +static GekkoOPTemplate table31[] = +{ + {28, &JitArm64::FallBackToInterpreter}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {60, &JitArm64::FallBackToInterpreter}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {444, &JitArm64::FallBackToInterpreter}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {124, &JitArm64::FallBackToInterpreter}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {316, &JitArm64::FallBackToInterpreter}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {412, &JitArm64::FallBackToInterpreter}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {476, &JitArm64::FallBackToInterpreter}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {284, &JitArm64::FallBackToInterpreter}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {0, &JitArm64::FallBackToInterpreter}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm64::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {26, &JitArm64::FallBackToInterpreter}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {922, &JitArm64::FallBackToInterpreter}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {954, &JitArm64::FallBackToInterpreter}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + + {54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, + {86, &JitArm64::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, + {246, &JitArm64::FallBackToInterpreter}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, + {278, &JitArm64::FallBackToInterpreter}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, + {470, &JitArm64::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, + {758, &JitArm64::FallBackToInterpreter}, //"dcba", OPTYPE_DCACHE, 0, 4}}, + {1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, + + //load word + {23, &JitArm64::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &JitArm64::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword + {279, &JitArm64::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &JitArm64::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword signextend + {343, &JitArm64::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &JitArm64::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte + {87, &JitArm64::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &JitArm64::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte reverse + {534, &JitArm64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {790, &JitArm64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + + // Conditional load/store (Wii SMP) + {150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, + {20, &JitArm64::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}}, + + //load string (interpret these) + {533, &JitArm64::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, + {597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, + + //store word + {151, &JitArm64::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, &JitArm64::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store halfword + {407, &JitArm64::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, &JitArm64::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store byte + {215, &JitArm64::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, &JitArm64::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store bytereverse + {662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {918, &JitArm64::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, + + {661, &JitArm64::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, + {725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, + + // fp load/store + {535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + + {663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + + {19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, + {83, &JitArm64::FallBackToInterpreter}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, + {144, &JitArm64::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, + {146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {210, &JitArm64::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}}, + {242, &JitArm64::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, + {339, &JitArm64::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, + {467, &JitArm64::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, + {371, &JitArm64::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, + {512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {595, &JitArm64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + {659, &JitArm64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + + {4, &JitArm64::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + {598, &JitArm64::FallBackToInterpreter}, //"sync", OPTYPE_SYSTEM, 0, 2}}, + {982, &JitArm64::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, + + // Unused instructions on GC + {310, &JitArm64::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, + {438, &JitArm64::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, + {854, &JitArm64::FallBackToInterpreter}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, + {306, &JitArm64::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}}, + {370, &JitArm64::FallBackToInterpreter}, //"tlbia", OPTYPE_SYSTEM, 0}}, + {566, &JitArm64::FallBackToInterpreter}, //"tlbsync", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table31_2[] = +{ + {266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {104, &JitArm64::FallBackToInterpreter}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {232, &JitArm64::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {200, &JitArm64::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +}; + +static GekkoOPTemplate table59[] = +{ + {18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, + {20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +// {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + +static GekkoOPTemplate table63[] = +{ + {264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, + {15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, + {72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, + {136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, + {12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, + + {64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, + {583, &JitArm64::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, + {70, &JitArm64::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, + {38, &JitArm64::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, + {134, &JitArm64::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, + {711, &JitArm64::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, +}; + +static GekkoOPTemplate table63_2[] = +{ + {18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, + {20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + + +namespace JitArm64Tables +{ + +void CompileInstruction(PPCAnalyst::CodeOp & op) +{ + JitArm64 *jitarm = (JitArm64 *)jit; + (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); + GekkoOPInfo *info = op.opinfo; + if (info) + { +#ifdef OPLOG + if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" + rsplocations.push_back(jit.js.compilerPC); + } +#endif + info->compileCount++; + info->lastUse = jit->js.compilerPC; + } +} + +void InitTables() +{ + // once initialized, tables are read-only + static bool initialized = false; + if (initialized) + return; + + //clear + for (int i = 0; i < 32; i++) + { + dynaOpTable59[i] = &JitArm64::unknown_instruction; + } + + for (int i = 0; i < 1024; i++) + { + dynaOpTable4 [i] = &JitArm64::unknown_instruction; + dynaOpTable19[i] = &JitArm64::unknown_instruction; + dynaOpTable31[i] = &JitArm64::unknown_instruction; + dynaOpTable63[i] = &JitArm64::unknown_instruction; + } + + for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) + { + dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_2[j].opcode; + dynaOpTable4[op] = table4_2[j].Inst; + } + } + + for (int i = 0; i < 16; i++) + { + int fill = i << 6; + for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_3[j].opcode; + dynaOpTable4[op] = table4_3[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) + { + int op = table4[i].opcode; + dynaOpTable4[op] = table4[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) + { + int op = table31[i].opcode; + dynaOpTable31[op] = table31[i].Inst; + } + + for (int i = 0; i < 1; i++) + { + int fill = i << 9; + for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table31_2[j].opcode; + dynaOpTable31[op] = table31_2[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) + { + int op = table19[i].opcode; + dynaOpTable19[op] = table19[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) + { + int op = table59[i].opcode; + dynaOpTable59[op] = table59[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) + { + int op = table63[i].opcode; + dynaOpTable63[op] = table63[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table63_2[j].opcode; + dynaOpTable63[op] = table63_2[j].Inst; + } + } + + initialized = true; + +} + +} // namespace diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h new file mode 100644 index 0000000000..d1788e404e --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h @@ -0,0 +1,14 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/PPCTables.h" + +namespace JitArm64Tables +{ + void CompileInstruction(PPCAnalyst::CodeOp & op); + void InitTables(); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp new file mode 100644 index 0000000000..2b5e4a5982 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -0,0 +1,80 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" + +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" +#include "Core/PowerPC/JitCommon/JitCache.h" + +using namespace Arm64Gen; + +void JitArm64AsmRoutineManager::Generate() +{ + enterCode = GetCodePtr(); + + MOVI2R(X29, (u64)&PowerPC::ppcState); + + dispatcher = GetCodePtr(); + printf("Dispatcher is %p\n", dispatcher); + // Downcount Check + // The result of slice decrementation should be in flags if somebody jumped here + // IMPORTANT - We jump on negative, not carry!!! + FixupBranch bail = B(CC_MI); + + dispatcherNoCheck = GetCodePtr(); + + // This block of code gets the address of the compiled block of code + // It runs though to the compiling portion if it isn't found + LDR(INDEX_UNSIGNED, W28, X29, PPCSTATE_OFF(pc)); // Load the current PC into W28 + BFM(W28, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK + + MOVI2R(X27, (u64)jit->GetBlockCache()->iCache); + LDR(W27, X27, X28); + + FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit + // Success, it is our Jitblock. + MOVI2R(X30, (u64)jit->GetBlockCache()->GetCodePointers()); + UBFM(X27, X27, 61, 60); // Same as X27 << 3 + LDR(X30, X30, X27); // Load the block address in to R14 + BR(X30); + // No need to jump anywhere after here, the block will go back to dispatcher start + + SetJumpTarget(JitBlock); + + MOVI2R(X30, (u64)&Jit); + BLR(X30); + + B(dispatcherNoCheck); + + SetJumpTarget(bail); + doTiming = GetCodePtr(); + MOVI2R(X30, (u64)&CoreTiming::Advance); + BLR(X30); + + // Does exception checking + LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); + MOVI2R(X30, (u64)&PowerPC::CheckExceptions); + BLR(X30); + LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + + // Check the state pointer to see if we are exiting + // Gets checked on every exception check + MOVI2R(W0, (u64)PowerPC::GetStatePtr()); + LDR(INDEX_UNSIGNED, W0, W0, 0); + FixupBranch Exit = CBNZ(W0); + + B(dispatcher); + + SetJumpTarget(Exit); + + FlushIcache(); +} + +void JitArm64AsmRoutineManager::GenerateCommon() +{ +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h new file mode 100644 index 0000000000..d2dba320c1 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h @@ -0,0 +1,29 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/Arm64Emitter.h" +#include "Core/PowerPC/JitCommon/JitAsmCommon.h" + +class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen::ARM64CodeBlock +{ +private: + void Generate(); + void GenerateCommon(); + +public: + void Init() + { + AllocCodeSpace(8192); + Generate(); + WriteProtect(); + } + + void Shutdown() + { + FreeCodeSpace(); + } +}; + diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index 3a573f7531..26a916ac65 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -83,6 +83,11 @@ const int BACKPATCH_SIZE = 5; #define CTX_R14 gregs[REG_R14] #define CTX_R15 gregs[REG_R15] #define CTX_RIP gregs[REG_RIP] + #elif _M_ARM_64 + typedef struct sigcontext SContext; + #define CTX_REG(x) regs[x] + #define CTX_SP sp + #define CTX_PC pc #elif _M_ARM_32 // Add others if required. typedef struct sigcontext SContext; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index e7c1d1ed2a..ea9b12be70 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -29,6 +29,11 @@ #include "Core/PowerPC/JitArm32/JitArm_Tables.h" #endif +#if _M_ARM_64 +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" +#endif + static bool bFakeVMEM = false; bool bMMU = false; @@ -66,6 +71,13 @@ namespace JitInterface break; } #endif + #if _M_ARM_64 + case 4: + { + ptr = new JitArm64(); + break; + } + #endif default: { PanicAlert("Unrecognizable cpu_core: %d", core); @@ -100,6 +112,13 @@ namespace JitInterface break; } #endif + #if _M_ARM_64 + case 4: + { + JitArm64Tables::InitTables(); + break; + } + #endif default: { PanicAlert("Unrecognizable cpu_core: %d", core); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 0916e3951e..64d24fdcca 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -69,6 +69,11 @@ struct BlockRegStats std::min(firstRead[reg], firstWrite[reg]); } + bool IsUsed(int reg) + { + return (numReads[reg] + numWrites[reg]) > 0; + } + inline void SetInputRegister(int reg, short opindex) { if (firstRead[reg] == -1)