From f107b5e1765b05bb4a8a1f11919e8adf7068a424 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 5 Feb 2014 01:56:23 +0000 Subject: [PATCH 1/5] [AArch64-emitter] Initial work on a emitter for 64bit ARM. I've tested a few instruction encodings and am expecting most to work as long as one stays away from VFP/SIMD. This implements mostly instructions to bring up an initial JIT with integer support. This can be improved to allow ease of use functions in the future, dealing with the raw imms/immr encodings is probably the worst thing ever. --- Source/Core/Common/Arm64Emitter.cpp | 1360 +++++++++++++++++++++++++++ Source/Core/Common/Arm64Emitter.h | 538 +++++++++++ Source/Core/Common/ArmCommon.h | 26 + Source/Core/Common/ArmEmitter.h | 23 +- 4 files changed, 1925 insertions(+), 22 deletions(-) create mode 100644 Source/Core/Common/Arm64Emitter.cpp create mode 100644 Source/Core/Common/Arm64Emitter.h create mode 100644 Source/Core/Common/ArmCommon.h diff --git a/Source/Core/Common/Arm64Emitter.cpp b/Source/Core/Common/Arm64Emitter.cpp new file mode 100644 index 0000000000..8c4f4f4ff4 --- /dev/null +++ b/Source/Core/Common/Arm64Emitter.cpp @@ -0,0 +1,1360 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Arm64Emitter.h" + +namespace Arm64Gen +{ + +void ARM64XEmitter::SetCodePtr(u8 *ptr) +{ + code = ptr; + startcode = code; + lastCacheFlushEnd = ptr; +} + +const u8 *ARM64XEmitter::GetCodePtr() const +{ + return code; +} + +u8 *ARM64XEmitter::GetWritableCodePtr() +{ + return code; +} + +void ARM64XEmitter::ReserveCodeSpace(u32 bytes) +{ + for (u32 i = 0; i < bytes/4; i++) + BRK(0); +} + +const u8 *ARM64XEmitter::AlignCode16() +{ + int c = int((u64)code & 15); + if (c) + ReserveCodeSpace(16-c); + return code; +} + +const u8 *ARM64XEmitter::AlignCodePage() +{ + int c = int((u64)code & 4095); + if (c) + ReserveCodeSpace(4096-c); + return code; +} + +void ARM64XEmitter::FlushIcache() +{ + FlushIcacheSection(lastCacheFlushEnd, code); + lastCacheFlushEnd = code; +} + +void ARM64XEmitter::FlushIcacheSection(u8 *start, u8 *end) +{ +#if defined(IOS) + // Header file says this is equivalent to: sys_icache_invalidate(start, end - start); + sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); +#else +#ifdef __clang__ + __clear_cache(start, end); +#else + __builtin___clear_cache(start, end); +#endif +#endif +} + + + +// Exception generation +const u32 ExcEnc[][3] = { + {0, 0, 1}, // SVC + {0, 0, 2}, // HVC + {0, 0, 3}, // SMC + {1, 0, 0}, // BRK + {2, 0, 0}, // HLT + {5, 0, 1}, // DCPS1 + {5, 0, 2}, // DCPS2 + {5, 0, 3}, // DCPS3 +}; + +// Arithmetic generation +const u32 ArithEnc[] = { + 0x058, // ADD + 0x258, // SUB +}; + +// Conditional Select +const u32 CondSelectEnc[][2] = { + {0, 0}, // CSEL + {0, 1}, // CSINC + {1, 0}, // CSINV + {1, 1}, // CSNEG +}; + +// Data-Processing (1 source) +const u32 Data1SrcEnc[][2] = { + {0, 0}, // RBIT + {0, 1}, // REV16 + {0, 2}, // REV32 + {0, 3}, // REV64 + {0, 4}, // CLZ + {0, 5}, // CLS +}; + +// Data-Processing (2 source) +const u32 Data2SrcEnc[] = { + 0x02, // UDIV + 0x03, // SDIV + 0x08, // LSLV + 0x09, // LSRV + 0x0A, // ASRV + 0x0B, // RORV + 0x10, // CRC32B + 0x11, // CRC32H + 0x12, // CRC32W + 0x14, // CRC32CB + 0x15, // CRC32CH + 0x16, // CRC32CW + 0x13, // CRC32X (64bit Only) + 0x17, // XRC32CX (64bit Only) +}; + +// Data-Processing (3 source) +const u32 Data3SrcEnc[][2] = { + {0, 0}, // MADD + {0, 1}, // MSUB + {1, 0}, // SMADDL (64Bit Only) + {1, 1}, // SMSUBL (64Bit Only) + {2, 0}, // SMULH (64Bit Only) + {5, 0}, // UMADDL (64Bit Only) + {5, 1}, // UMSUBL (64Bit Only) + {6, 0}, // UMULH (64Bit Only) +}; + +// Logical (shifted register) +const u32 LogicalEnc[][2] = { + {0, 0}, // AND + {0, 1}, // BIC + {1, 0}, // OOR + {1, 1}, // ORN + {2, 0}, // EOR + {2, 1}, // EON + {3, 0}, // ANDS + {3, 1}, // BICS +}; + +// Load/Store Exclusive +u32 LoadStoreExcEnc[][5] = { + {0, 0, 0, 0, 0}, // STXRB + {0, 0, 0, 0, 1}, // STLXRB + {0, 0, 1, 0, 0}, // LDXRB + {0, 0, 1, 0, 1}, // LDAXRB + {0, 1, 0, 0, 1}, // STLRB + {0, 1, 1, 0, 1}, // LDARB + {1, 0, 0, 0, 0}, // STXRH + {1, 0, 0, 0, 1}, // STLXRH + {1, 0, 1, 0, 0}, // LDXRH + {1, 0, 1, 0, 1}, // LDAXRH + {1, 1, 0, 0, 1}, // STLRH + {1, 1, 1, 0, 1}, // LDARH + {2, 0, 0, 0, 0}, // STXR + {3, 0, 0, 0, 0}, // (64bit) STXR + {2, 0, 0, 0, 1}, // STLXR + {3, 0, 0, 0, 1}, // (64bit) STLXR + {2, 0, 0, 1, 0}, // STXP + {3, 0, 0, 1, 0}, // (64bit) STXP + {2, 0, 0, 1, 1}, // STLXP + {3, 0, 0, 1, 1}, // (64bit) STLXP + {2, 0, 1, 0, 0}, // LDXR + {3, 0, 1, 0, 0}, // (64bit) LDXR + {2, 0, 1, 0, 1}, // LDAXR + {3, 0, 1, 0, 1}, // (64bit) LDAXR + {2, 0, 1, 1, 0}, // LDXP + {3, 0, 1, 1, 0}, // (64bit) LDXP + {2, 0, 1, 1, 1}, // LDAXP + {3, 0, 1, 1, 1}, // (64bit) LDAXP + {2, 1, 0, 0, 1}, // STLR + {3, 1, 0, 0, 1}, // (64bit) STLR + {2, 1, 1, 0, 1}, // LDAR + {3, 1, 1, 0, 1}, // (64bit) LDAR +}; + +void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr) +{ + bool b64Bit = is64Bit(Rt); + s64 distance = (s64)ptr - (s64(code) + 8); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Rt = DecodeReg(Rt); + Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \ + (distance << 5) | Rt); +} + +void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr) +{ + bool b64Bit = is64Bit(Rt); + s64 distance = (s64)ptr - (s64(code) + 8); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Rt = DecodeReg(Rt); + Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \ + (bits << 19) | (distance << 5) | Rt); +} + +void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr) +{ + s64 distance = (s64)ptr - s64(code); + + _assert_msg_(DYNA_REC, !(distance & 0x3), "%s: distance must be a multiple of 4: %lx", __FUNCTION__, distance); + + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF)); +} + +void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn) +{ + Rn = DecodeReg(Rn); + Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4); +} + +void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm) +{ + _assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __FUNCTION__, imm); + + Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]); +} + +void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt) +{ + Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt); +} + +void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + Write32((flags << 29) | (ArithEnc[instenc] << 21) | \ + (Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? 1 << 21 : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | \ + (0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + bool b64Bit = is64Bit(Rn); + + _assert_msg_(DYNA_REC, !(imm & ~0x1F), "%s: too large immediate: %d", __FUNCTION__, imm) + _assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv) + + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \ + (imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv); +} + +void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + bool b64Bit = is64Bit(Rm); + + _assert_msg_(DYNA_REC, !(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv) + + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \ + (Rm << 16) | (cond << 12) | (Rn << 5) | nzcv); +} + +void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | \ + (0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (0x2D6 << 21) | \ + (Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (0x0D6 << 21) | \ + (Rm << 16) | (Data2SrcEnc[instenc] << 10) | \ + (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Ra = DecodeReg(Ra); + Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | \ + (Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | \ + (Ra << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + Rd = DecodeReg(Rd); + Rm = DecodeReg(Rm); + Rn = DecodeReg(Rn); + Write32((LogicalEnc[instenc][0] << 29) | (0x50 << 21) | (LogicalEnc[instenc][1] << 21) | \ + Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + _assert_msg_(DYNA_REC, !(imm & 0xFFFFF), "%s: offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set + bitop |= 0x1; + Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, + ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt) +{ + Rs = DecodeReg(Rs); + Rt2 = DecodeReg(Rt2); + Rn = DecodeReg(Rn); + Rt = DecodeReg(Rt); + Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | \ + (LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | \ + (LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool b128Bit = is128Bit(Rt); + bool bVec = isVector(Rt); + + if (b128Bit) + imm >>= 4; + else if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0xF), "%s: offset too large %d", __FUNCTION__, imm); + + u32 opc = 0; + if (b128Bit) + opc = 2; + else if (b64Bit && bVec) + opc = 1; + else if (b64Bit && !bVec) + opc = 2; + + Rt = DecodeReg(Rt); + Rt2 = DecodeReg(Rt2); + Rn = DecodeReg(Rn); + Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0x1FF), "%s: offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 12) | (op2 << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + bool b64Bit = is64Bit(Rt); + bool bVec = isVector(Rt); + + if (b64Bit) + imm >>= 3; + else + imm >>= 2; + + _assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm); + + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + bool b64Bit = is64Bit(Rd); + + _assert_msg_(DYNA_REC, !(imm & ~0xFFFF), "%s: immediate out of range: %d", __FUNCTION__, imm); + + Rd = DecodeReg(Rd); + Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd); +} + +void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + bool b64Bit = is64Bit(Rd); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | \ + (immr << 16) | (imms << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + Rt = DecodeReg(Rt); + Rn = DecodeReg(Rn); + Rm = DecodeReg(Rm); + + Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (Rm << 16) | \ + (extend << 13) | (1 << 11) | (Rn << 5) | Rt); +} + +void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd) +{ + bool b64Bit = is64Bit(Rd); + + _assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s: immediate too large: %x", __FUNCTION__, imm); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | \ + (imm << 10) | (Rn << 5) | Rd); +} + +void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. + // Use Rn to determine bitness here. + bool b64Bit = is64Bit(Rn); + + Rd = DecodeReg(Rd); + Rn = DecodeReg(Rn); + + Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (b64Bit << 22) | \ + (immr << 16) | (imms << 10) | (Rn << 5) | Rd); +} + +// FixupBranch branching +void ARM64XEmitter::SetJumpTarget(FixupBranch const &branch) +{ + bool Not = false; + u32 inst = 0; + s64 distance = (s64)(code - branch.ptr); + distance >>= 2; + + switch (branch.type) + { + case 1: // CBNZ + Not = true; + case 0: // CBZ + { + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + bool b64Bit = is64Bit(branch.reg); + ARM64Reg reg = DecodeReg(branch.reg); + inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (distance << 5) | reg; + } + break; + case 2: // B (conditional) + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x2A << 25) | (distance << 5) | branch.cond; + break; + case 4: // TBNZ + Not = true; + case 3: // TBZ + { + _assert_msg_(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + ARM64Reg reg = DecodeReg(branch.reg); + inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (distance << 5) | reg; + } + break; + case 5: // B (uncoditional) + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x5 << 26) | distance; + break; + case 6: // BL (unconditional) + _assert_msg_(DYNA_REC, distance >= -0x3FFFFFF && distance < 0x3FFFFFF, "%s(%d): Received too large distance: %lx", __FUNCTION__, branch.type, distance); + inst = (0x25 << 26) | distance; + break; + } + *(u32*)branch.ptr = inst; +} + +FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 0; + branch.reg = Rt; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 1; + branch.reg = Rt; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::B(CCFlags cond) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 2; + branch.cond = cond; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 3; + branch.reg = Rt; + branch.bit = bit; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit) +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 4; + branch.reg = Rt; + branch.bit = bit; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::B() +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 5; + HINT(HINT_NOP); + return branch; +} +FixupBranch ARM64XEmitter::BL() +{ + FixupBranch branch; + branch.ptr = code; + branch.type = 6; + HINT(HINT_NOP); + return branch; +} + +// Compare and Branch +void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr) +{ + EncodeCompareBranchInst(0, Rt, ptr); +} +void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr) +{ + EncodeCompareBranchInst(1, Rt, ptr); +} + +// Conditional Branch +void ARM64XEmitter::B(CCFlags cond, const void* ptr) +{ + s64 distance = (s64)ptr - (s64(code) + 8); + distance >>= 2; + + _assert_msg_(DYNA_REC, distance >= -0xFFFFF && distance < 0xFFFFF, "%s: Received too large distance: %lx", __FUNCTION__, distance); + + Write32((0x54 << 24) | (distance << 5) | cond); +} + +// Test and Branch +void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr) +{ + EncodeTestBranchInst(0, Rt, bits, ptr); +} +void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr) +{ + EncodeTestBranchInst(1, Rt, bits, ptr); +} + +// Unconditional Branch +void ARM64XEmitter::B(const void *ptr) +{ + EncodeUnconditionalBranchInst(0, ptr); +} +void ARM64XEmitter::BL(const void *ptr) +{ + EncodeUnconditionalBranchInst(1, ptr); +} + +// Unconditional Branch (register) +void ARM64XEmitter::BR(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::BLR(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::RET(ARM64Reg Rn) +{ + EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn); +} +void ARM64XEmitter::ERET() +{ + EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP); +} +void ARM64XEmitter::DRPS() +{ + EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP); +} + +// Exception generation +void ARM64XEmitter::SVC(u32 imm) +{ + EncodeExceptionInst(0, imm); +} + +void ARM64XEmitter::HVC(u32 imm) +{ + EncodeExceptionInst(1, imm); +} + +void ARM64XEmitter::SMC(u32 imm) +{ + EncodeExceptionInst(2, imm); +} + +void ARM64XEmitter::BRK(u32 imm) +{ + EncodeExceptionInst(3, imm); +} + +void ARM64XEmitter::HLT(u32 imm) +{ + EncodeExceptionInst(4, imm); +} + +void ARM64XEmitter::DCPS1(u32 imm) +{ + EncodeExceptionInst(5, imm); +} + +void ARM64XEmitter::DCPS2(u32 imm) +{ + EncodeExceptionInst(6, imm); +} + +void ARM64XEmitter::DCPS3(u32 imm) +{ + EncodeExceptionInst(7, imm); +} + +// System +void ARM64XEmitter::_MSR(PStateField field, u8 imm) +{ + u32 op1 = 0, op2 = 0; + switch (field) + { + case FIELD_SPSel: + op1 = 0; op2 = 5; + break; + case FIELD_DAIFSet: + op1 = 3; op2 = 6; + break; + case FIELD_DAIFClr: + op1 = 3; op2 = 7; + break; + } + EncodeSystemInst(0, op1, 3, imm, op2, WSP); +} +void ARM64XEmitter::HINT(SystemHint op) +{ + EncodeSystemInst(0, 3, 2, 0, op, WSP); +} +void ARM64XEmitter::CLREX() +{ + EncodeSystemInst(0, 3, 3, 0, 2, WSP); +} +void ARM64XEmitter::DSB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 4, WSP); +} +void ARM64XEmitter::DMB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 5, WSP); +} +void ARM64XEmitter::ISB(BarrierType type) +{ + EncodeSystemInst(0, 3, 3, type, 6, WSP); +} + +// Add/Subtract (extended register) +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + ADD(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + ADD(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + SUB(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + SUB(Rd, Rn, Rm, ArithOption(Rd)); +} + +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option); +} + +void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm) +{ + CMN(Rn, Rm, ArithOption(Rn)); +} + +void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(0, true, SP, Rn, Rm, Option); +} + +void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm) +{ + CMP(Rn, Rm, ArithOption(Rn)); +} + +void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option) +{ + EncodeArithmeticInst(1, true, SP, Rn, Rm, Option); +} + +// Add/Subtract (with carry) +void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm); +} +void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm); +} +void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm); +} +void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm); +} + +// Conditional Compare (immediate) +void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond); +} +void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond); +} + +// Conditiona Compare (register) +void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond); +} +void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond) +{ + EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond); +} + +// Conditional Select +void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(0, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(1, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(2, Rd, Rn, Rm, cond); +} +void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond) +{ + EncodeCondSelectInst(3, Rd, Rn, Rm, cond); +} + +// Data-Processing 1 source +void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(0, Rd, Rn); +} +void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(1, Rd, Rn); +} +void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(2, Rd, Rn); +} +void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(3, Rd, Rn); +} +void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(4, Rd, Rn); +} +void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn) +{ + EncodeData1SrcInst(5, Rd, Rn); +} + +// Data-Processing 2 source +void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(0, Rd, Rn, Rm); +} +void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(1, Rd, Rn, Rm); +} +void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(2, Rd, Rn, Rm); +} +void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(3, Rd, Rn, Rm); +} +void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(4, Rd, Rn, Rm); +} +void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(5, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(6, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(7, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(8, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(9, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(10, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(11, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(12, Rd, Rn, Rm); +} +void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) +{ + EncodeData2SrcInst(13, Rd, Rn, Rm); +} + +// Data-Processing 3 source +void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(0, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(1, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(2, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(3, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(4, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(5, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(6, Rd, Rn, Rm, Ra); +} +void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) +{ + EncodeData3SrcInst(7, Rd, Rn, Rm, Ra); +} + +// Logical (shifted register) +void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(0, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(1, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(2, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(3, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(4, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(5, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(6, Rd, Rn, Rm, Shift); +} +void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift) +{ + EncodeLogicalInst(7, Rd, Rn, Rm, Shift); +} + +// Logical (immediate) +void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(0, Rd, Rn, immr, imms); +} +void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(3, Rd, Rn, immr, imms); +} +void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(2, Rd, Rn, immr, imms); +} +void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(1, Rd, Rn, immr, imms); +} +void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeLogicalImmInst(3, SP, Rn, immr, imms); +} + +// Add/subtract (immediate) +void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd); +} +void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd); +} +void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd); +} +void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd); +} +void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift) +{ + EncodeAddSubImmInst(1, true, shift, imm, Rn, is64Bit(Rn) ? SP : WSP); +} + +// Data Processing (Immediate) +void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(2, Rd, imm, pos); +} +void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(0, Rd, imm, pos); +} +void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos) +{ + EncodeMOVWideInst(3, Rd, imm, pos); +} + +// Bitfield move +void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms); +} +void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms); +} +void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms) +{ + EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms); +} + +// Load Register (Literal) +void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(0, Rt, imm); +} +void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(2, Rt, imm); +} +void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm) +{ + EncodeLoadRegisterInst(3, Rt, imm); +} + +// Load/Store Exclusive +void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn); +} +void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn); +} +void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(12 + is64Bit(Rt), Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(14 + is64Bit(Rt), Rs, SP, Rt, Rn); +} +void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(16 + is64Bit(Rt), Rs, Rt2, Rt, Rn); +} +void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(18 + is64Bit(Rt), Rs, Rt2, Rt, Rn); +} +void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(20 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(22 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(24 + is64Bit(Rt), SP, Rt2, Rt, Rn); +} +void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(26 + is64Bit(Rt), SP, Rt2, Rt, Rn); +} +void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(28 + is64Bit(Rt), SP, SP, Rt, Rn); +} +void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn) +{ + EncodeLoadStoreExcInst(30 + is64Bit(Rt), SP, SP, Rt, Rn); +} + +// Load/Store no-allocate pair (offset) +void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm); +} +void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm) +{ + EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm); +} + +// Load/Store register (immediate post-indexed) +// XXX: Most of these support vectors +void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x0E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x0E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x0E2 : 0x0E3, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x1E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x1E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x1E2 : 0x1E3, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E0 : 0x2E0, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(is64Bit(Rt) ? 0x3E1 : 0x2E1, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} +void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm) +{ + if (type == INDEX_UNSIGNED) + EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm); + else + EncodeLoadStoreIndexedInst(0x2E2, + type == INDEX_POST ? 1 : 3, Rt, Rn, imm); +} + +// Load/Store register (register offset) +void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + bool b64Bit = is64Bit(Rt); + EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm, extend); +} +void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend) +{ + EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm, extend); +} + +// Wrapper around MOVZ+MOVK +void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) +{ + unsigned parts = is64Bit(Rd) ? 4 : 2; + bool upload_part[4] = {}; + bool need_movz = false; + + if (!is64Bit(Rd)) + _assert_msg_(DYNA_REC, !(imm >> 32), "%s: immediate doesn't fit in 32bit register: %lx", __FUNCTION__, imm); + + // XXX: Optimize more + // XXX: Support rotating immediates to save instructions + if (optimize) + { + for (unsigned i = 0; i < parts; ++i) + { + if ((imm >> (i * 16)) & 0xFFFF) + upload_part[i] = true; + else + need_movz = true; + } + } + + for (unsigned i = 0; i < parts; ++i) + { + if (need_movz && upload_part[i]) + { + MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i); + need_movz = false; + } + else + { + if (upload_part[i] || !optimize) + MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i); + } + } + +} + +} + diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h new file mode 100644 index 0000000000..e8294b9a25 --- /dev/null +++ b/Source/Core/Common/Arm64Emitter.h @@ -0,0 +1,538 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/ArmCommon.h" +#include "Common/CodeBlock.h" +#include "Common/Common.h" + +namespace Arm64Gen +{ + +// X30 serves a dual purpose as a link register +// Encoded as +// Types: +// 000 - 32bit GPR +// 001 - 64bit GPR +// 010 - VFP single precision +// 100 - VFP double precision +// 110 - VFP quad precision +enum ARM64Reg +{ + // 32bit registers + W0 = 0, W1, W2, W3, W4, W5, W6, + W7, W8, W9, W10, W11, W12, W13, W14, + W15, W16, W17, W18, W19, W20, W21, W22, + W23, W24, W25, W26, W27, W28, W29, W30, + + WSP, // 32bit stack pointer + + // 64bit registers + X0 = 0x20, X1, X2, X3, X4, X5, X6, + X7, X8, X9, X10, X11, X12, X13, X14, + X15, X16, X17, X18, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, X29, X30, + + SP, // 64bit stack pointer + + // VFP single precision registers + S0 = 0x40, S1, S2, S3, S4, S5, S6, + S7, S8, S9, S10, S11, S12, S13, + S14, S15, S16, S17, S18, S19, S20, + S21, S22, S23, S24, S25, S26, S27, + S28, S29, S30, S31, + + // VFP Double Precision registers + D0 = 0x80, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, + D24, D25, D26, D27, D28, D29, D30, D31, + + // ASIMD Quad-Word registers + Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, + Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23, + Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31, + + // For PRFM(prefetch memory) encoding + // This is encoded in the Rt register + // Data preload + PLDL1KEEP = 0, PLDL1STRM, + PLDL2KEEP, PLDL2STRM, + PLDL3KEEP, PLDL3STRM, + // Instruction preload + PLIL1KEEP = 8, PLIL1STRM, + PLIL2KEEP, PLIL2STRM, + PLIL3KEEP, PLIL3STRM, + // Prepare for store + PLTL1KEEP = 16, PLTL1STRM, + PLTL2KEEP, PLTL2STRM, + PLTL3KEEP, PLTL3STRM, + + INVALID_REG = 0xFFFFFFFF +}; + +inline bool is64Bit(ARM64Reg reg) { return reg & 0x20; } +inline bool is128Bit(ARM64Reg reg) { return reg & 0xC0; } +inline bool isVector(ARM64Reg reg) { return (reg & 0xC0) != 0; } +inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); } +inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); } + +enum OpType +{ + TYPE_IMM = 0, + TYPE_REG, + TYPE_IMMSREG, + TYPE_RSR, + TYPE_MEM +}; + +enum ShiftType +{ + ST_LSL = 0, + ST_LSR = 1, + ST_ASR = 2, + ST_ROR = 3, +}; + +enum IndexType +{ + INDEX_UNSIGNED, + INDEX_POST, + INDEX_PRE, +}; + +enum ShiftAmount +{ + SHIFT_0 = 0, + SHIFT_16 = 1, + SHIFT_32 = 2, + SHIFT_48 = 3, +}; + +enum ExtendType +{ + EXTEND_UXTW = 2, + EXTEND_LSL = 3, // Default for zero shift amount + EXTEND_SXTW = 6, + EXTEND_SXTX = 7, +}; + +struct FixupBranch +{ + u8 *ptr; + // Type defines + // 0 = CBZ (32bit) + // 1 = CBNZ (32bit) + // 2 = B (conditional) + // 3 = TBZ + // 4 = TBNZ + // 5 = B (unconditional) + // 6 = BL (unconditional) + u32 type; + + // Used with B.cond + CCFlags cond; + + // Used with TBZ/TBNZ + u8 bit; + + // Used with Test/Compare and Branch + ARM64Reg reg; +}; + +enum PStateField +{ + FIELD_SPSel = 0, + FIELD_DAIFSet, + FIELD_DAIFClr, +}; + +enum SystemHint +{ + HINT_NOP = 0, + HINT_YIELD, + HINT_WFE, + HINT_WFI, + HINT_SEV, + HINT_SEVL, +}; + +enum BarrierType +{ + OSHLD = 1, + OSHST = 2, + OSH = 3, + NSHLD = 5, + NSHST = 6, + NSH = 7, + ISHLD = 9, + ISHST = 10, + ISH = 11, + LD = 13, + ST = 14, + SY = 15, +}; + +class ArithOption +{ + public: + enum WidthSpecifier { + WIDTH_DEFAULT, + WIDTH_32BIT, + WIDTH_64BIT, + }; + enum ExtendSpecifier { + EXTEND_UXTB = 0x0, + EXTEND_UXTH = 0x1, + EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */ + EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */ + EXTEND_SXTB = 0x4, + EXTEND_SXTH = 0x5, + EXTEND_SXTW = 0x6, + EXTEND_SXTX = 0x7, + }; + enum TypeSpecifier { + TYPE_EXTENDEDREG, + TYPE_IMM, + TYPE_SHIFTEDREG, + }; + private: + ARM64Reg _destReg; + WidthSpecifier _width; + ExtendSpecifier _extend; + TypeSpecifier _type; + ShiftType _shifttype; + u32 _shift; + public: + ArithOption(ARM64Reg Rd) + { + _destReg = Rd; + _shift = 0; + _type = TYPE_EXTENDEDREG; + if (is64Bit(Rd)) + { + _width = WIDTH_64BIT; + _extend = EXTEND_UXTX; + } + else + { + _width = WIDTH_32BIT; + _extend = EXTEND_UXTW; + } + } + ArithOption(ARM64Reg Rd, ShiftType ShiftType, u32 Shift) + { + _destReg = Rd; + _shift = Shift; + _shifttype = ShiftType; + _type = TYPE_SHIFTEDREG; + if (is64Bit(Rd)) + _width = WIDTH_64BIT; + else + _width = WIDTH_32BIT; + } + TypeSpecifier GetType() + { + return _type; + } + u32 GetData() + { + switch (_type) + { + case TYPE_EXTENDEDREG: + return (_width == WIDTH_64BIT ? (1 << 31) : 0) | + (_extend << 13) | + (_shift << 10); + break; + case TYPE_SHIFTEDREG: + return (_width == WIDTH_64BIT ? (1 << 31) : 0) | + (_shifttype << 22) | + (_shift << 10); + break; + default: + _dbg_assert_msg_(DYNA_REC, false, "Invalid type in GetData"); + break; + } + return 0; + } +}; + +class ARM64XEmitter +{ +private: + u8 *code, *startcode; + u8 *lastCacheFlushEnd; + + void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr); + void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr); + void EncodeUnconditionalBranchInst(u32 op, const void* ptr); + void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn); + void EncodeExceptionInst(u32 instenc, u32 imm); + void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt); + void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn); + void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm); + void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt); + void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos); + void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend); + void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); + void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + +protected: + inline void Write32(u32 value) {*(u32*)code = value; code+=4;} + +public: + ARM64XEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {} + virtual ~ARM64XEmitter() {} + + void SetCodePtr(u8 *ptr); + void ReserveCodeSpace(u32 bytes); + const u8 *AlignCode16(); + const u8 *AlignCodePage(); + const u8 *GetCodePtr() const; + void FlushIcache(); + void FlushIcacheSection(u8 *start, u8 *end); + u8 *GetWritableCodePtr(); + + // FixupBranch branching + void SetJumpTarget(FixupBranch const &branch); + FixupBranch CBZ(ARM64Reg Rt); + FixupBranch CBNZ(ARM64Reg Rt); + FixupBranch B(CCFlags cond); + FixupBranch TBZ(ARM64Reg Rt, u8 bit); + FixupBranch TBNZ(ARM64Reg Rt, u8 bit); + FixupBranch B(); + FixupBranch BL(); + + // Compare and Branch + void CBZ(ARM64Reg Rt, const void* ptr); + void CBNZ(ARM64Reg Rt, const void* ptr); + + // Conditional Branch + void B(CCFlags cond, const void* ptr); + + // Test and Branch + void TBZ(ARM64Reg Rt, u8 bits, const void* ptr); + void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr); + + // Unconditional Branch + void B(const void *ptr); + void BL(const void *ptr); + + // Unconditional Branch (register) + void BR(ARM64Reg Rn); + void BLR(ARM64Reg Rn); + void RET(ARM64Reg Rn); + void ERET(); + void DRPS(); + + // Exception generation + void SVC(u32 imm); + void HVC(u32 imm); + void SMC(u32 imm); + void BRK(u32 imm); + void HLT(u32 imm); + void DCPS1(u32 imm); + void DCPS2(u32 imm); + void DCPS3(u32 imm); + + // System + void _MSR(PStateField field, u8 imm); + void HINT(SystemHint op); + void CLREX(); + void DSB(BarrierType type); + void DMB(BarrierType type); + void ISB(BarrierType type); + + // Add/Subtract (Extended/Shifted register) + void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void CMN(ARM64Reg Rn, ARM64Reg Rm); + void CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + void CMP(ARM64Reg Rn, ARM64Reg Rm); + void CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option); + + // Add/Subtract (with carry) + void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // Conditional Compare (immediate) + void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond); + + // Conditional Compare (register) + void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond); + + // Conditional Select + void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond); + + // Data-Processing 1 source + void RBIT(ARM64Reg Rd, ARM64Reg Rn); + void REV16(ARM64Reg Rd, ARM64Reg Rn); + void REV32(ARM64Reg Rd, ARM64Reg Rn); + void REV64(ARM64Reg Rd, ARM64Reg Rn); + void CLZ(ARM64Reg Rd, ARM64Reg Rn); + void CLS(ARM64Reg Rd, ARM64Reg Rn); + + // Data-Processing 2 source + void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm); + + // Data-Processing 3 source + void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra); + + // Logical (shifted register) + void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift); + + // Logical (immediate) + void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void TST(ARM64Reg Rn, u32 immr, u32 imms); + + // Add/subtract (immediate) + void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false); + void CMP(ARM64Reg Rn, u32 imm, bool shift = false); + + // Data Processing (Immediate) + void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0); + + // Bitfield move + void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); + + // Load Register (Literal) + void LDR(ARM64Reg Rt, u32 imm); + void LDRSW(ARM64Reg Rt, u32 imm); + void PRFM(ARM64Reg Rt, u32 imm); + + // Load/Store Exclusive + void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void LDXRB(ARM64Reg Rt, ARM64Reg Rn); + void LDAXRB(ARM64Reg Rt, ARM64Reg Rn); + void STLRB(ARM64Reg Rt, ARM64Reg Rn); + void LDARB(ARM64Reg Rt, ARM64Reg Rn); + void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void LDXRH(ARM64Reg Rt, ARM64Reg Rn); + void LDAXRH(ARM64Reg Rt, ARM64Reg Rn); + void STLRH(ARM64Reg Rt, ARM64Reg Rn); + void LDARH(ARM64Reg Rt, ARM64Reg Rn); + void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn); + void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void LDXR(ARM64Reg Rt, ARM64Reg Rn); + void LDAXR(ARM64Reg Rt, ARM64Reg Rn); + void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn); + void STLR(ARM64Reg Rt, ARM64Reg Rn); + void LDAR(ARM64Reg Rt, ARM64Reg Rn); + + // Load/Store no-allocate pair (offset) + void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm); + + // Load/Store register (immediate indexed) + void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm); + + // Load/Store register (register offset) + void STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void STRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void STR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDR(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + void PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL); + + // Wrapper around MOVZ+MOVK + void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true); +}; + +class ARM64CodeBlock : public CodeBlock +{ +private: + void PoisonMemory() override + { + u32* ptr = (u32*)region; + u32* maxptr = (u32*)region + region_size; + // If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything + // Less than optimal, but there would be nothing we could do but throw a runtime warning anyway. + // AArch64: 0xD4200000 = BRK 0 + while (ptr < maxptr) + *ptr++ = 0xD4200000; + } +}; +} + diff --git a/Source/Core/Common/ArmCommon.h b/Source/Core/Common/ArmCommon.h new file mode 100644 index 0000000000..ae919d9232 --- /dev/null +++ b/Source/Core/Common/ArmCommon.h @@ -0,0 +1,26 @@ +// Copyright 2013 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +enum CCFlags +{ + CC_EQ = 0, // Equal + CC_NEQ, // Not equal + CC_CS, // Carry Set + CC_CC, // Carry Clear + CC_MI, // Minus (Negative) + CC_PL, // Plus + CC_VS, // Overflow + CC_VC, // No Overflow + CC_HI, // Unsigned higher + CC_LS, // Unsigned lower or same + CC_GE, // Signed greater than or equal + CC_LT, // Signed less than + CC_GT, // Signed greater than + CC_LE, // Signed less than or equal + CC_AL, // Always (unconditional) 14 + CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same + CC_LO = CC_CC, // Alias of CC_CC Unsigned lower +}; +const u32 NO_COND = 0xE0000000; + diff --git a/Source/Core/Common/ArmEmitter.h b/Source/Core/Common/ArmEmitter.h index d33269d090..fbc0f80d67 100644 --- a/Source/Core/Common/ArmEmitter.h +++ b/Source/Core/Common/ArmEmitter.h @@ -8,6 +8,7 @@ #include +#include "Common/ArmCommon.h" #include "Common/CodeBlock.h" #include "Common/Common.h" @@ -61,28 +62,6 @@ enum ARMReg INVALID_REG = 0xFFFFFFFF }; -enum CCFlags -{ - CC_EQ = 0, // Equal - CC_NEQ, // Not equal - CC_CS, // Carry Set - CC_CC, // Carry Clear - CC_MI, // Minus (Negative) - CC_PL, // Plus - CC_VS, // Overflow - CC_VC, // No Overflow - CC_HI, // Unsigned higher - CC_LS, // Unsigned lower or same - CC_GE, // Signed greater than or equal - CC_LT, // Signed less than - CC_GT, // Signed greater than - CC_LE, // Signed less than or equal - CC_AL, // Always (unconditional) 14 - CC_HS = CC_CS, // Alias of CC_CS Unsigned higher or same - CC_LO = CC_CC, // Alias of CC_CC Unsigned lower -}; -const u32 NO_COND = 0xE0000000; - enum ShiftType { ST_LSL = 0, From 2b06257e16ce813fde0abe6b3540918361fe400b Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 5 Jun 2014 20:33:35 -0500 Subject: [PATCH 2/5] Beginning of the AArch64 JIT branch. This is the bare minimum required to run a few games on AArch64. Was able to run starfield and Animal Crossing to the Nintendo logo. QEmu emulation is literally the slowest thing in the world, it maxes out at around 12mhz on my Core i7-4930MX. --- Source/Core/Common/CMakeLists.txt | 30 +- Source/Core/Core/CMakeLists.txt | 13 +- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 287 ++++++++++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 99 ++++ .../Core/PowerPC/JitArm64/JitArm64Cache.cpp | 16 + .../Core/PowerPC/JitArm64/JitArm64Cache.h | 17 + .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 254 +++++++++ .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 22 + .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 319 ++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 242 +++++++++ .../JitArm64/JitArm64_SystemRegisters.cpp | 60 +++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 493 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.h | 14 + Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 80 +++ Source/Core/Core/PowerPC/JitArm64/JitAsm.h | 29 ++ Source/Core/Core/PowerPC/JitInterface.cpp | 19 + Source/Core/Core/PowerPC/PPCAnalyst.h | 5 + 17 files changed, 1984 insertions(+), 15 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitArm64/Jit.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/Jit.h create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitAsm.h diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index aebfc3e7ea..cfdf6cd14c 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -31,19 +31,23 @@ set(SRCS BreakPoints.cpp Logging/LogManager.cpp) -if(_M_ARM_32) #ARMv7 - set(SRCS ${SRCS} - ArmCPUDetect.cpp - ArmEmitter.cpp - GenericFPURoundMode.cpp) -elseif(_M_X86) #X86 - set(SRCS ${SRCS} - x64CPUDetect.cpp - x64FPURoundMode.cpp) -else() #Generic - set(SRCS ${SRCS} - GenericFPURoundMode.cpp - x64CPUDetect.cpp) +if(_M_ARM) + if (_M_ARM_32) #ARMv7 + set(SRCS ${SRCS} + ArmEmitter.cpp) + else() #AArch64 + set(SRCS ${SRCS} + Arm64Emitter.cpp) + endif() + set(SRCS ${SRCS} + ArmCPUDetect.cpp + GenericFPURoundMode.cpp) +else() + if(_M_X86) #X86 + set(SRCS ${SRCS} + x64FPURoundMode.cpp) + endif() + set(SRCS ${SRCS} x64CPUDetect.cpp) endif() if(WIN32) set(SRCS ${SRCS} ExtendedTrace.cpp) diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 55669cfb0f..13235e3a73 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -197,8 +197,7 @@ if(_M_X86) PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/JitAsmCommon.cpp PowerPC/JitCommon/Jit_Util.cpp) -endif() -if(_M_ARM_32) +elseif(_M_ARM_32) set(SRCS ${SRCS} ArmMemTools.cpp PowerPC/JitArm32/Jit.cpp @@ -217,6 +216,16 @@ if(_M_ARM_32) PowerPC/JitArm32/JitArm_SystemRegisters.cpp PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp ) +elseif(_M_ARM_64) + set(SRCS ${SRCS} + PowerPC/JitArm64/Jit.cpp + PowerPC/JitArm64/JitAsm.cpp + PowerPC/JitArm64/JitArm64Cache.cpp + PowerPC/JitArm64/JitArm64_RegCache.cpp + PowerPC/JitArm64/JitArm64_Branch.cpp + PowerPC/JitArm64/JitArm64_LoadStore.cpp + PowerPC/JitArm64/JitArm64_SystemRegisters.cpp + PowerPC/JitArm64/JitArm64_Tables.cpp) endif() set(LIBS diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp new file mode 100644 index 0000000000..f8322a5438 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -0,0 +1,287 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/PatchEngine.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" + +using namespace Arm64Gen; + +static int CODE_SIZE = 1024*1024*32; + +void JitArm64::Init() +{ + AllocCodeSpace(CODE_SIZE); + jo.enableBlocklink = true; + gpr.Init(this); + fpr.Init(this); + + blocks.Init(); + asm_routines.Init(); + + code_block.m_stats = &js.st; + code_block.m_gpa = &js.gpa; + code_block.m_fpa = &js.fpa; +} + +void JitArm64::ClearCache() +{ + ClearCodeSpace(); + blocks.Clear(); +} + +void JitArm64::Shutdown() +{ + FreeCodeSpace(); + blocks.Shutdown(); + asm_routines.Shutdown(); +} + +void JitArm64::unknown_instruction(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "unknown_instruction %08x - Fix me ;)", inst.hex); +} + +void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) +{ + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); + MOVI2R(W0, inst.hex); + MOVI2R(X30, (u64)instr); + BLR(X30); +} + +void JitArm64::HLEFunction(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "HLEFunction %08x - Fix me ;)", inst.hex); + exit(0); +} + +void JitArm64::DoNothing(UGeckoInstruction inst) +{ + // Yup, just don't do anything. +} + +void JitArm64::Break(UGeckoInstruction inst) +{ + WARN_LOG(DYNA_REC, "Breaking! %08x - Fix me ;)", inst.hex); + exit(0); +} + +void JitArm64::DoDownCount() +{ + ARM64Reg WA = gpr.GetReg(); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + if (js.downcountAmount < 4096) // We can enlarge this if we used rotations + { + SUBS(WA, WA, js.downcountAmount); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + } + else + { + ARM64Reg WB = gpr.GetReg(); + MOVI2R(WB, js.downcountAmount); + SUBS(WA, WA, WB); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(downcount)); + gpr.Unlock(WB); + } + gpr.Unlock(WA); +} + +// Exits +void JitArm64::WriteExit(u32 destination) +{ + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + JitBlock::LinkData linkData; + linkData.exitAddress = destination; + linkData.exitPtrs = GetWritableCodePtr(); + linkData.linkStatus = false; + + DoDownCount(); + + // Link opportunity! + int block; + if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) + { + // It exists! Joy of joy! + B(blocks.GetBlock(block)->checkedEntry); + linkData.linkStatus = true; + } + else + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + MOVI2R(WA, destination); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + MOVI2R(XA, (u64)asm_routines.dispatcher); + BR(XA); + gpr.Unlock(WA); + } + + b->linkData.push_back(linkData); +} +void JitArm64::WriteExceptionExit(ARM64Reg dest) +{ + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + gpr.Unlock(dest); + DoDownCount(); + MOVI2R(EncodeRegTo64(dest), (u64)&PowerPC::CheckExceptions); + BLR(EncodeRegTo64(dest)); + LDR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, dest, X29, PPCSTATE_OFF(pc)); + + MOVI2R(EncodeRegTo64(dest), (u64)asm_routines.dispatcher); + BR(EncodeRegTo64(dest)); +} + +void JitArm64::WriteExitDestInR(ARM64Reg Reg) +{ + STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); + gpr.Unlock(Reg); + DoDownCount(); + MOVI2R(EncodeRegTo64(Reg), (u64)asm_routines.dispatcher); + BR(EncodeRegTo64(Reg)); +} + +void STACKALIGN JitArm64::Run() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void JitArm64::SingleStep() +{ + CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; + pExecAddr(); +} + +void STACKALIGN JitArm64::Jit(u32 em_address) +{ + if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || Core::g_CoreStartupParameter.bJITNoBlockCache) + { + ClearCache(); + } + + int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); + JitBlock *b = blocks.GetBlock(block_num); + const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); + blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); +} + +const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) +{ + int blockSize = code_buf->GetSize(); + + if (Core::g_CoreStartupParameter.bEnableDebugging) + { + // Comment out the following to disable breakpoints (speed-up) + blockSize = 1; + } + + if (em_address == 0) + { + Core::SetState(Core::CORE_PAUSE); + WARN_LOG(DYNA_REC, "ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); + } + + js.isLastInstruction = false; + js.blockStart = em_address; + js.fifoBytesThisBlock = 0; + js.downcountAmount = 0; + js.skipnext = false; + js.curBlock = b; + + u32 nextPC = em_address; + // Analyze the block, collect all instructions it is made of (including inlining, + // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. + nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); + + PPCAnalyst::CodeOp *ops = code_buf->codebuffer; + + const u8 *start = GetCodePtr(); + b->checkedEntry = start; + b->runCount = 0; + + // Downcount flag check, Only valid for linked blocks + { + FixupBranch bail = B(CC_PL); + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + MOVI2R(WA, js.blockStart); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + MOVI2R(XA, (u64)asm_routines.doTiming); + BR(XA); + gpr.Unlock(WA); + SetJumpTarget(bail); + } + + const u8 *normalEntry = GetCodePtr(); + b->normalEntry = normalEntry; + + gpr.Start(js.gpa); + fpr.Start(js.fpa); + + if (!Core::g_CoreStartupParameter.bEnableDebugging) + js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); + + // Translate instructions + for (u32 i = 0; i < code_block.m_num_instructions; i++) + { + js.compilerPC = ops[i].address; + js.op = &ops[i]; + js.instructionNumber = i; + const GekkoOPInfo *opinfo = ops[i].opinfo; + js.downcountAmount += opinfo->numCycles; + + if (i == (code_block.m_num_instructions - 1)) + { + // WARNING - cmp->branch merging will screw this up. + js.isLastInstruction = true; + js.next_inst = 0; + } + else + { + // help peephole optimizations + js.next_inst = ops[i + 1].inst; + js.next_compilerPC = ops[i + 1].address; + } + if (!ops[i].skip) + { + if (js.memcheck && (opinfo->flags & FL_USE_FPU)) + { + // Don't do this yet + BRK(0x7777); + } + + JitArm64Tables::CompileInstruction(ops[i]); + + if (js.memcheck && (opinfo->flags & FL_LOADSTORE)) + { + // Don't do this yet + BRK(0x666); + } + } + } + + if (code_block.m_memory_exception) + BRK(0x500); + + if (code_block.m_broken) + { + printf("Broken Block going to 0x%08x\n", nextPC); + WriteExit(nextPC); + } + + b->codeSize = (u32)(GetCodePtr() - normalEntry); + b->originalSize = code_block.m_num_instructions; + FlushIcache(); + return start; +} diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h new file mode 100644 index 0000000000..a1ff3e525d --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -0,0 +1,99 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/Arm64Emitter.h" + +#include "Core/PowerPC/CPUCoreBase.h" +#include "Core/PowerPC/PPCAnalyst.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitArm64Cache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" +#include "Core/PowerPC/JitCommon/JitBase.h" + +#define PPCSTATE_OFF(elem) ((s64)&PowerPC::ppcState.elem - (s64)&PowerPC::ppcState) +using namespace Arm64Gen; +class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock +{ +public: + JitArm64() : code_buffer(32000) {} + ~JitArm64() {} + + void Init(); + void Shutdown(); + + JitBaseBlockCache *GetBlockCache() { return &blocks; } + + const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return NULL; } + + bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } + + void ClearCache(); + + CommonAsmRoutinesBase *GetAsmRoutines() + { + return &asm_routines; + } + + void Run(); + void SingleStep(); + + void Jit(u32 em_address); + + const char *GetName() + { + return "JITARM64"; + } + + // OPCODES + void unknown_instruction(UGeckoInstruction inst); + void FallBackToInterpreter(UGeckoInstruction inst); + void DoNothing(UGeckoInstruction inst); + void HLEFunction(UGeckoInstruction inst); + + void DynaRunTable4(UGeckoInstruction inst); + void DynaRunTable19(UGeckoInstruction inst); + void DynaRunTable31(UGeckoInstruction inst); + void DynaRunTable59(UGeckoInstruction inst); + void DynaRunTable63(UGeckoInstruction inst); + + // Force break + void Break(UGeckoInstruction inst); + + // Branch + void sc(UGeckoInstruction inst); + void rfi(UGeckoInstruction inst); + void bx(UGeckoInstruction inst); + void bcx(UGeckoInstruction inst); + void bcctrx(UGeckoInstruction inst); + void bclrx(UGeckoInstruction inst); + + // System Registers + void mtmsr(UGeckoInstruction inst); + + // LoadStore + void icbi(UGeckoInstruction inst); + +private: + Arm64GPRCache gpr; + Arm64FPRCache fpr; + + JitArm64BlockCache blocks; + JitArm64AsmRoutineManager asm_routines; + + PPCAnalyst::CodeBuffer code_buffer; + + const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); + + void DoDownCount(); + + // Exits + void WriteExit(u32 destination); + void WriteExceptionExit(ARM64Reg dest); + void WriteExitDestInR(ARM64Reg dest); + + FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); +}; + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp new file mode 100644 index 0000000000..55c319a7cc --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.cpp @@ -0,0 +1,16 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64Cache.h" + +void JitArm64BlockCache::WriteLinkBlock(u8* location, const u8* address) +{ +} + +void JitArm64BlockCache::WriteDestroyBlock(const u8* location, u32 address) +{ +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h new file mode 100644 index 0000000000..3868751597 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64Cache.h @@ -0,0 +1,17 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/JitCommon/JitCache.h" + + +typedef void (*CompiledCode)(); + +class JitArm64BlockCache : public JitBaseBlockCache +{ +private: + void WriteLinkBlock(u8* location, const u8* address); + void WriteDestroyBlock(const u8* location, u32 address); +}; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp new file mode 100644 index 0000000000..037e995718 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -0,0 +1,254 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::sc(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + ORR(WA, WA, 31, 0); // Same as WA | EXCEPTION_SYSCALL + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(Exceptions)); + + MOVI2R(WA, js.compilerPC + 4); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + + // WA is unlocked in this function + WriteExceptionExit(WA); +} + +void JitArm64::rfi(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + // See Interpreter rfi for details + const u32 mask = 0x87C0FFFF; + const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] + // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; + // R0 = MSR location + // R1 = MSR contents + // R2 = Mask + // R3 = Mask + ARM64Reg WA = gpr.GetReg(); + ARM64Reg WB = gpr.GetReg(); + ARM64Reg WC = gpr.GetReg(); + + MOVI2R(WA, (~mask) & clearMSR13); + MOVI2R(WB, mask & clearMSR13); + + LDR(INDEX_UNSIGNED, WC, X29, PPCSTATE_OFF(msr)); + + AND(WC, WC, WB, ArithOption(WC, ST_LSL, 0)); // rD = Masked MSR + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here + + AND(WA, WA, WB, ArithOption(WA, ST_LSL, 0)); // rB contains masked SRR1 here + ORR(WA, WA, WC, ArithOption(WA, ST_LSL, 0)); // rB = Masked MSR OR masked SRR1 + + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(msr)); // STR rB in to rA + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_SRR0])); + gpr.Unlock(WB, WC); + + // WA is unlocked in this function + WriteExceptionExit(WA); +} + +void JitArm64::bx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + u32 destination; + if (inst.AA) + destination = SignExt26(inst.LI << 2); + else + destination = js.compilerPC + SignExt26(inst.LI << 2); + + if (inst.LK) + { + u32 Jumpto = js.compilerPC + 4; + ARM64Reg WA = gpr.GetReg(); + MOVI2R(WA, Jumpto); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WA); + } + + if (destination == js.compilerPC) + { + // make idle loops go faster + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + MOVI2R(XA, (u64)&CoreTiming::Idle); + BLR(XA); + MOVI2R(WA, js.compilerPC); + WriteExceptionExit(WA); + } + + WriteExit(destination); +} + +void JitArm64::bcx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + SUBS(WA, WA, 1); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + if (inst.LK) + { + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WA, Jumpto); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + } + gpr.Unlock(WA); + + u32 destination; + if (inst.AA) + destination = SignExt16(inst.BD << 2); + else + destination = js.compilerPC + SignExt16(inst.BD << 2); + WriteExit(destination); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + + WriteExit(js.compilerPC + 4); +} + +void JitArm64::bcctrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + // bcctrx doesn't decrement and/or test CTR + _assert_msg_(DYNA_REC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); + + if (inst.BO_2 & BO_DONT_CHECK_CONDITION) + { + // BO_2 == 1z1zz -> b always + + //NPC = CTR & 0xfffffffc; + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + if (inst.LK_3) + { + ARM64Reg WB = gpr.GetReg(); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WB, Jumpto); + STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WB); + } + + ARM64Reg WA = gpr.GetReg(); + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. + WriteExitDestInR(WA); + } + else + { + // Rare condition seen in (just some versions of?) Nintendo's NES Emulator + // BO_2 == 001zy -> b if false + // BO_2 == 011zy -> b if true + _assert_msg_(DYNA_REC, false, "Haven't implemented rare form of bcctrx yet"); + } +} + +void JitArm64::bclrx(UGeckoInstruction inst) +{ + INSTRUCTION_START + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + ARM64Reg WA = gpr.GetReg(); + FixupBranch pCTRDontBranch; + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR + { + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + SUBS(WA, WA, 1); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_CTR])); + + if (inst.BO & BO_BRANCH_IF_CTR_0) + pCTRDontBranch = B(CC_NEQ); + else + pCTRDontBranch = B(CC_EQ); + } + + FixupBranch pConditionDontBranch; + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit + { + pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), + !(inst.BO_2 & BO_BRANCH_IF_TRUE)); + } + + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(spr[SPR_LR])); + AND(WA, WA, 30, 29); // Wipe the bottom 2 bits. + + if (inst.LK) + { + ARM64Reg WB = gpr.GetReg(); + u32 Jumpto = js.compilerPC + 4; + MOVI2R(WB, Jumpto); + STR(INDEX_UNSIGNED, WB, X29, PPCSTATE_OFF(spr[SPR_LR])); + gpr.Unlock(WB); + } + + WriteExitDestInR(WA); + + if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) + SetJumpTarget( pConditionDontBranch ); + if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) + SetJumpTarget( pCTRDontBranch ); + + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp new file mode 100644 index 0000000000..783a9ce78b --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -0,0 +1,22 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +using namespace Arm64Gen; + +void JitArm64::icbi(UGeckoInstruction inst) +{ + FallBackToInterpreter(inst); + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp new file mode 100644 index 0000000000..a165828037 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -0,0 +1,319 @@ +// copyright 2014 dolphin emulator project +// licensed under gplv2 +// refer to the license.txt file included. + +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" + +using namespace Arm64Gen; + +void Arm64RegCache::Init(ARM64XEmitter *emitter) +{ + m_emit = emitter; + GetAllocationOrder(); +} + +ARM64Reg Arm64RegCache::GetReg(void) +{ + for (auto& it : m_host_registers) + { + if (!it.IsLocked()) + { + it.Lock(); + return it.GetReg(); + } + } + // Holy cow, how did you run out of registers? + // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure happen + _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); + return INVALID_REG; +} + +void Arm64RegCache::LockRegister(ARM64Reg host_reg) +{ + auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); + if (reg == m_host_registers.end()) + _assert_msg_(DYNA_REC, false, "Don't try locking a register that isn't in the cache"); + _assert_msg_(DYNA_REC, !reg->IsLocked(), "This register is already locked"); + reg->Lock(); +} + +void Arm64RegCache::UnlockRegister(ARM64Reg host_reg) +{ + auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); + if (reg == m_host_registers.end()) + _assert_msg_(DYNA_REC, false, "Don't try unlocking a register that isn't in the cache"); + _assert_msg_(DYNA_REC, reg->IsLocked(), "This register is already unlocked"); + reg->Unlock(); +} + +// GPR Cache +void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats &stats) +{ + // To make this technique easy, let's just work on pairs of even/odd registers + // We could do simple odd/even as well to get a few spare temporary registers + // but it isn't really needed, we aren't starved for registers + for (int reg = 0; reg < 32; reg += 2) + { + u32 regs_used = (stats.IsUsed(reg) << 1) | stats.IsUsed(reg + 1); + switch (regs_used) + { + case 0x02: // Reg+0 used + { + ARM64Reg host_reg = GetReg(); + m_guest_registers[reg].LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); + } + break; + case 0x01: // Reg+1 used + { + ARM64Reg host_reg = GetReg(); + m_guest_registers[reg + 1].LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg + 1])); + } + break; + case 0x03: // Both registers used + { + // Get a 64bit host register + ARM64Reg host_reg = EncodeRegTo64(GetReg()); + m_guest_registers[reg].LoadToAway(host_reg, REG_LOW); + m_guest_registers[reg + 1].LoadToAway(host_reg, REG_HIGH); + + // host_reg is 64bit here. + // It'll load both guest_registers in one LDR + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[reg])); + } + break; + case 0x00: // Neither used + default: + break; + } + } +} + +bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) +{ + static std::vector callee_regs = + { + X28, X27, X26, X25, X24, X23, X22, X21, X20, + X19, INVALID_REG, + }; + return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); +} + +void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) +{ + for (int i = 0; i < 32; ++i) + { + bool flush = true; + if (mode == FLUSH_INTERPRETER) + { + if (!(op->regsOut[0] == i || + op->regsOut[1] == i || + op->regsIn[0] == i || + op->regsIn[1] == i || + op->regsIn[2] == i)) + { + // This interpreted instruction doesn't use this register + flush = false; + } + } + + if (m_guest_registers[i].GetType() == REG_REG) + { + // Has to be flushed if it isn't in a callee saved register + ARM64Reg host_reg = m_guest_registers[i].GetReg(); + if (flush || !IsCalleeSaved(host_reg)) + { + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + Unlock(host_reg); + + m_guest_registers[i].Flush(); + } + } + else if (m_guest_registers[i].GetType() == REG_IMM) + { + if (flush) + { + ARM64Reg host_reg = GetReg(); + + m_emit->MOVI2R(host_reg, m_guest_registers[i].GetImm()); + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + + Unlock(host_reg); + + m_guest_registers[i].Flush(); + } + } + else if (m_guest_registers[i].GetType() == REG_AWAY) + { + // We are away, that means that this register and the next are stored in a single 64bit register + // There is a very good chance that both the registers are out in some "temp" register + bool flush_2 = true; + if (mode == FLUSH_INTERPRETER) + { + if (!(op->regsOut[0] == (i + 1) || + op->regsOut[1] == (i + 1) || + op->regsIn[0] == (i + 1) || + op->regsIn[1] == (i + 1) || + op->regsIn[2] == (i + 1))) + { + // This interpreted instruction doesn't use this register + flush_2 = false; + } + } + + ARM64Reg host_reg = m_guest_registers[i].GetAwayReg(); + ARM64Reg host_reg_1 = m_guest_registers[i].GetReg(); + ARM64Reg host_reg_2 = m_guest_registers[i + 1].GetReg(); + // Flush if either of these shared registers are used. + if (flush || + flush_2 || + !IsCalleeSaved(host_reg) || + !IsCalleeSaved(host_reg_1) || + !IsCalleeSaved(host_reg_2)) + { + + if (host_reg_1 == INVALID_REG) + { + // We never loaded this register + // We've got to test the state of our shared register + // Currently it is always reg+1 + if (host_reg_2 == INVALID_REG) + { + // We didn't load either of these registers + // This can happen in cases where we had to flush register state + // or if we hit an interpreted instruction before we could use it + // Dump the whole thing in one go and flush both registers + + // 64bit host register will store 2 32bit store registers in one go + m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[i])); + } + else + { + // Alright, bottom register isn't used, but top one is + // Only store the top one + m_emit->STR(INDEX_UNSIGNED, host_reg_2, X29, PPCSTATE_OFF(gpr[i + 1])); + Unlock(host_reg_2); + } + } + else + { + m_emit->STR(INDEX_UNSIGNED, host_reg_1, X29, PPCSTATE_OFF(gpr[i])); + Unlock(host_reg_1); + } + // Flush both registers + m_guest_registers[i].Flush(); + m_guest_registers[i + 1].Flush(); + Unlock(DecodeReg(host_reg)); + } + // Skip the next register since we've handled it here + ++i; + } + } +} + +ARM64Reg Arm64GPRCache::R(u32 preg) +{ + OpArg& reg = m_guest_registers[preg]; + switch (reg.GetType()) + { + case REG_REG: // already in a reg + return reg.GetReg(); + break; + case REG_IMM: // Is an immediate + { + ARM64Reg host_reg = GetReg(); + m_emit->MOVI2R(host_reg, reg.GetImm()); + } + break; + case REG_AWAY: // Register is away in a shared register + { + // Let's do the voodoo that we dodo + if (reg.GetReg() == INVALID_REG) + { + // Alright, we need to move to a valid location + ARM64Reg host_reg = GetReg(); + reg.LoadAwayToReg(host_reg); + + // Alright, we need to extract from our away register + // To our new 32bit register + if (reg.GetAwayLocation() == REG_LOW) + { + // We are in the low bits + // Just move it over to the low bits of the new register + m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 0, 31); + } + else + { + // We are in the high bits + m_emit->UBFM(EncodeRegTo64(host_reg), reg.GetAwayReg(), 32, 63); + } + } + else + { + // We've already moved to a valid place to work on + return reg.GetReg(); + } + } + break; + case REG_NOTLOADED: // Register isn't loaded at /all/ + { + // This is kind of annoying, we shouldn't have gotten here + // This can happen with instructions that use multiple registers(eg lmw) + // The PPCAnalyst needs to be modified to handle these cases + _dbg_assert_msg_(DYNA_REC, false, "Hit REG_NOTLOADED type oparg. Fix the PPCAnalyst"); + ARM64Reg host_reg = GetReg(); + reg.LoadToReg(host_reg); + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + return host_reg; + } + break; + default: + _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); + break; + } + // We've got an issue if we end up here + return INVALID_REG; +} + +void Arm64GPRCache::GetAllocationOrder(void) +{ + // Callee saved registers first in hopes that we will keep everything stored there first + const std::vector allocation_order = + { + W28, W27, W26, W25, W24, W23, W22, W21, W20, + W19, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, + W10, W11, W12, W13, W14, W15, W16, W17, W18, + W30, + }; + + for (ARM64Reg reg : allocation_order) + m_host_registers.push_back(HostReg(reg)); +} + +// FPR Cache +void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) +{ + // XXX: Flush our stuff +} + +ARM64Reg Arm64FPRCache::R(u32 preg) +{ + // XXX: return a host reg holding a guest register +} + +void Arm64FPRCache::GetAllocationOrder(void) +{ + const std::vector allocation_order = + { + D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, + D11, D12, D13, D14, D15, D16, D17, D18, D19, + D20, D21, D22, D23, D24, D25, D26, D27, D28, + D29, D30, D31, + }; + + for (ARM64Reg reg : allocation_order) + m_host_registers.push_back(HostReg(reg)); +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h new file mode 100644 index 0000000000..26dd213f56 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -0,0 +1,242 @@ +// copyright 2014 dolphin emulator project +// licensed under gplv2 +// refer to the license.txt file included. + +#pragma once + +#include + +#include "Common/Arm64Emitter.h" +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/PPCAnalyst.h" + +// Dedicated host registers +// X29 = ppcState pointer +using namespace Arm64Gen; + +enum RegType +{ + REG_NOTLOADED = 0, + REG_REG, // Reg type is register + REG_IMM, // Reg is really a IMM + REG_AWAY, // Reg is away +}; +enum RegLocation +{ + REG_LOW = 0, + REG_HIGH, +}; + +enum FlushMode +{ + // Flushes all registers, no exceptions + FLUSH_ALL = 0, + // Flushes registers in a conditional branch + // Doesn't wipe the state of the registers from the cache + FLUSH_MAINTAIN_STATE, + // Flushes only the required registers for an interpreter call + FLUSH_INTERPRETER, +}; + +class OpArg +{ +public: + OpArg() + { + m_type = REG_NOTLOADED; + m_reg = INVALID_REG; + m_value = 0; + } + + RegType GetType() + { + return m_type; + } + + ARM64Reg GetReg() + { + return m_reg; + } + ARM64Reg GetAwayReg() + { + return m_away_reg; + } + RegLocation GetAwayLocation() + { + return m_away_location; + } + u32 GetImm() + { + return m_value; + } + void LoadToReg(ARM64Reg reg) + { + m_type = REG_REG; + m_reg = reg; + } + void LoadToAway(ARM64Reg reg, RegLocation location) + { + m_type = REG_AWAY; + m_reg = INVALID_REG; + m_away_reg = reg; + m_away_location = location; + } + void LoadAwayToReg(ARM64Reg reg) + { + // We are still an away type + // We just are also in another register + m_reg = reg; + } + void LoadToImm(u32 imm) + { + m_type = REG_IMM; + m_value = imm; + } + void Flush() + { + m_type = REG_NOTLOADED; + } + +private: + // For REG_REG + RegType m_type; // store type + ARM64Reg m_reg; // host register we are in + + // For REG_AWAY + // Host register that we are away in + // This is a 64bit register + ARM64Reg m_away_reg; + RegLocation m_away_location; + + // For REG_IMM + u32 m_value; // IMM value +}; + +class HostReg +{ +public: + HostReg() : m_reg(INVALID_REG), m_locked(false) {} + HostReg(ARM64Reg reg) : m_reg(reg), m_locked(false) {} + bool IsLocked(void) { return m_locked; } + void Lock(void) { m_locked = true; } + void Unlock(void) { m_locked = false; } + ARM64Reg GetReg(void) { return m_reg; } + + bool operator==(const ARM64Reg& reg) + { + return reg == m_reg; + } + +private: + ARM64Reg m_reg; + bool m_locked; +}; + +class Arm64RegCache +{ +public: + Arm64RegCache(void) : m_emit(nullptr), m_reg_stats(nullptr) {}; + virtual ~Arm64RegCache() {}; + + void Init(ARM64XEmitter *emitter); + + virtual void Start(PPCAnalyst::BlockRegStats &stats) {} + + // Flushes the register cache in different ways depending on the mode + virtual void Flush(FlushMode mode, PPCAnalyst::CodeOp* op) = 0; + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + virtual ARM64Reg R(u32 reg) = 0; + + // Returns a temporary register for use + // Requires unlocking after done + ARM64Reg GetReg(void); + + // Locks a register so a cache cannot use it + // Useful for function calls + template + void Lock(Args... args) + { + for (T reg : {args...}) + { + LockRegister(reg); + } + } + + // Unlocks a locked register + // Unlocks registers locked with both GetReg and LockRegister + template + void Unlock(Args... args) + { + for (T reg : {args...}) + { + UnlockRegister(reg); + } + } + +protected: + // Get the order of the host registers + virtual void GetAllocationOrder(void) = 0; + + // Lock a register + void LockRegister(ARM64Reg host_reg); + + // Unlock a register + void UnlockRegister(ARM64Reg host_reg); + + // Code emitter + ARM64XEmitter *m_emit; + + // Host side registers that hold the host registers in order of use + std::vector m_host_registers; + + // Register stats for the current block + PPCAnalyst::BlockRegStats *m_reg_stats; +}; + +class Arm64GPRCache : public Arm64RegCache +{ +public: + ~Arm64GPRCache() {} + + void Start(PPCAnalyst::BlockRegStats &stats); + + // Flushes the register cache in different ways depending on the mode + void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr); + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + ARM64Reg R(u32 preg); + +protected: + // Get the order of the host registers + void GetAllocationOrder(void); + + // Our guest GPRs + // PowerPC has 32 GPRs + OpArg m_guest_registers[32]; + +private: + bool IsCalleeSaved(ARM64Reg reg); +}; + +class Arm64FPRCache : public Arm64RegCache +{ +public: + ~Arm64FPRCache() {} + // Flushes the register cache in different ways depending on the mode + void Flush(FlushMode mode, PPCAnalyst::CodeOp* op = nullptr); + + // Returns a guest register inside of a host register + // Will dump an immediate to the host register as well + ARM64Reg R(u32 preg); + +protected: + // Get the order of the host registers + void GetAllocationOrder(void); + + // Our guest FPRs + // Gekko has 32 paired registers(32x2) + OpArg m_guest_registers[32][2]; +}; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp new file mode 100644 index 0000000000..7c72a0ccac --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -0,0 +1,60 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" +#include "Common/Common.h" + +#include "Core/Core.h" +#include "Core/CoreTiming.h" +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/PPCTables.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" + +FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) +{ + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + FixupBranch branch; + switch (bit) + { + case CR_SO_BIT: // check bit 61 set + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? TBNZ(XA, 61) : TBZ(XA, 61); + break; + case CR_EQ_BIT: // check bits 31-0 == 0 + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? CBZ(WA) : CBNZ(WA); + break; + case CR_GT_BIT: // check val > 0 + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + CMP(XA, SP); + branch = B(jump_if_set ? CC_GT : CC_LE); + break; + case CR_LT_BIT: // check bit 62 set + LDR(INDEX_UNSIGNED, XA, X29, PPCSTATE_OFF(cr_val[field])); + branch = jump_if_set ? TBNZ(XA, 62) : TBZ(XA, 62); + break; + default: + _assert_msg_(DYNA_REC, false, "Invalid CR bit"); + } + + gpr.Unlock(WA); + return branch; +} + +void JitArm64::mtmsr(UGeckoInstruction inst) +{ + INSTRUCTION_START + // Don't interpret this, if we do we get thrown out + //JITDISABLE(bJITSystemRegistersOff) + + STR(INDEX_UNSIGNED, gpr.R(inst.RS), X29, PPCSTATE_OFF(msr)); + + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + + WriteExit(js.compilerPC + 4); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp new file mode 100644 index 0000000000..d798ed5056 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -0,0 +1,493 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Core/PowerPC/JitInterface.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" + +// Should be moved in to the Jit class +typedef void (JitArm64::*_Instruction) (UGeckoInstruction instCode); + +static _Instruction dynaOpTable[64]; +static _Instruction dynaOpTable4[1024]; +static _Instruction dynaOpTable19[1024]; +static _Instruction dynaOpTable31[1024]; +static _Instruction dynaOpTable59[32]; +static _Instruction dynaOpTable63[1024]; + +void JitArm64::DynaRunTable4(UGeckoInstruction inst) {(this->*dynaOpTable4 [inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable19(UGeckoInstruction inst) {(this->*dynaOpTable19[inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable31(UGeckoInstruction inst) {(this->*dynaOpTable31[inst.SUBOP10])(inst);} +void JitArm64::DynaRunTable59(UGeckoInstruction inst) {(this->*dynaOpTable59[inst.SUBOP5 ])(inst);} +void JitArm64::DynaRunTable63(UGeckoInstruction inst) {(this->*dynaOpTable63[inst.SUBOP10])(inst);} + +struct GekkoOPTemplate +{ + int opcode; + _Instruction Inst; + //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out +}; + +static GekkoOPTemplate primarytable[] = +{ + {4, &JitArm64::DynaRunTable4}, //"RunTable4", OPTYPE_SUBTABLE | (4<<24), 0}}, + {19, &JitArm64::DynaRunTable19}, //"RunTable19", OPTYPE_SUBTABLE | (19<<24), 0}}, + {31, &JitArm64::DynaRunTable31}, //"RunTable31", OPTYPE_SUBTABLE | (31<<24), 0}}, + {59, &JitArm64::DynaRunTable59}, //"RunTable59", OPTYPE_SUBTABLE | (59<<24), 0}}, + {63, &JitArm64::DynaRunTable63}, //"RunTable63", OPTYPE_SUBTABLE | (63<<24), 0}}, + + {16, &JitArm64::bcx}, //"bcx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {18, &JitArm64::bx}, //"bx", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + + {1, &JitArm64::HLEFunction}, //"HLEFunction", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {2, &JitArm64::FallBackToInterpreter}, //"DynaBlock", OPTYPE_SYSTEM, 0}}, + {3, &JitArm64::Break}, //"twi", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {17, &JitArm64::sc}, //"sc", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + + {7, &JitArm64::FallBackToInterpreter}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, + {8, &JitArm64::FallBackToInterpreter}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {10, &JitArm64::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {11, &JitArm64::FallBackToInterpreter}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {12, &JitArm64::FallBackToInterpreter}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, + {13, &JitArm64::FallBackToInterpreter}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, + {14, &JitArm64::FallBackToInterpreter}, //"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + {15, &JitArm64::FallBackToInterpreter}, //"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0}}, + + {20, &JitArm64::FallBackToInterpreter}, //"rlwimix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_A | FL_IN_S | FL_RC_BIT}}, + {21, &JitArm64::FallBackToInterpreter}, //"rlwinmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {23, &JitArm64::FallBackToInterpreter}, //"rlwnmx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_IN_B | FL_RC_BIT}}, + + {24, &JitArm64::FallBackToInterpreter}, //"ori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {25, &JitArm64::FallBackToInterpreter}, //"oris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {26, &JitArm64::FallBackToInterpreter}, //"xori", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {27, &JitArm64::FallBackToInterpreter}, //"xoris", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S}}, + {28, &JitArm64::FallBackToInterpreter}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + {29, &JitArm64::FallBackToInterpreter}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, + + {32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + + {44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + + {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, + + {48, &JitArm64::FallBackToInterpreter}, //"lfs", OPTYPE_LOADFP, FL_IN_A}}, + {49, &JitArm64::FallBackToInterpreter}, //"lfsu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + {50, &JitArm64::FallBackToInterpreter}, //"lfd", OPTYPE_LOADFP, FL_IN_A}}, + {51, &JitArm64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}}, + + {52, &JitArm64::FallBackToInterpreter}, //"stfs", OPTYPE_STOREFP, FL_IN_A}}, + {53, &JitArm64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + {54, &JitArm64::FallBackToInterpreter}, //"stfd", OPTYPE_STOREFP, FL_IN_A}}, + {55, &JitArm64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}}, + + {56, &JitArm64::FallBackToInterpreter}, //"psq_l", OPTYPE_PS, FL_IN_A}}, + {57, &JitArm64::FallBackToInterpreter}, //"psq_lu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + {60, &JitArm64::FallBackToInterpreter}, //"psq_st", OPTYPE_PS, FL_IN_A}}, + {61, &JitArm64::FallBackToInterpreter}, //"psq_stu", OPTYPE_PS, FL_OUT_A | FL_IN_A}}, + + //missing: 0, 5, 6, 9, 22, 30, 62, 58 + {0, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {5, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {6, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {9, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {22, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {30, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {62, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, + {58, &JitArm64::FallBackToInterpreter}, //"unknown_instruction", OPTYPE_UNKNOWN, 0}}, +}; + +static GekkoOPTemplate table4[] = +{ //SUBOP10 + {0, &JitArm64::FallBackToInterpreter}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm64::FallBackToInterpreter}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {40, &JitArm64::FallBackToInterpreter}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, + {136, &JitArm64::FallBackToInterpreter}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, + {264, &JitArm64::FallBackToInterpreter}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm64::FallBackToInterpreter}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {72, &JitArm64::FallBackToInterpreter}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm64::FallBackToInterpreter}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {528, &JitArm64::FallBackToInterpreter}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, + {560, &JitArm64::FallBackToInterpreter}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, + {592, &JitArm64::FallBackToInterpreter}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}}, + {624, &JitArm64::FallBackToInterpreter}, //"ps_merge11", OPTYPE_PS, FL_RC_BIT}}, + + {1014, &JitArm64::FallBackToInterpreter}, //"dcbz_l", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table4_2[] = +{ + {10, &JitArm64::FallBackToInterpreter}, //"ps_sum0", OPTYPE_PS, 0}}, + {11, &JitArm64::FallBackToInterpreter}, //"ps_sum1", OPTYPE_PS, 0}}, + {12, &JitArm64::FallBackToInterpreter}, //"ps_muls0", OPTYPE_PS, 0}}, + {13, &JitArm64::FallBackToInterpreter}, //"ps_muls1", OPTYPE_PS, 0}}, + {14, &JitArm64::FallBackToInterpreter}, //"ps_madds0", OPTYPE_PS, 0}}, + {15, &JitArm64::FallBackToInterpreter}, //"ps_madds1", OPTYPE_PS, 0}}, + {18, &JitArm64::FallBackToInterpreter}, //"ps_div", OPTYPE_PS, 0, 16}}, + {20, &JitArm64::FallBackToInterpreter}, //"ps_sub", OPTYPE_PS, 0}}, + {21, &JitArm64::FallBackToInterpreter}, //"ps_add", OPTYPE_PS, 0}}, + {23, &JitArm64::FallBackToInterpreter}, //"ps_sel", OPTYPE_PS, 0}}, + {24, &JitArm64::FallBackToInterpreter}, //"ps_res", OPTYPE_PS, 0}}, + {25, &JitArm64::FallBackToInterpreter}, //"ps_mul", OPTYPE_PS, 0}}, + {26, &JitArm64::FallBackToInterpreter}, //"ps_rsqrte", OPTYPE_PS, 0, 1}}, + {28, &JitArm64::FallBackToInterpreter}, //"ps_msub", OPTYPE_PS, 0}}, + {29, &JitArm64::FallBackToInterpreter}, //"ps_madd", OPTYPE_PS, 0}}, + {30, &JitArm64::FallBackToInterpreter}, //"ps_nmsub", OPTYPE_PS, 0}}, + {31, &JitArm64::FallBackToInterpreter}, //"ps_nmadd", OPTYPE_PS, 0}}, +}; + + +static GekkoOPTemplate table4_3[] = +{ + {6, &JitArm64::FallBackToInterpreter}, //"psq_lx", OPTYPE_PS, 0}}, + {7, &JitArm64::FallBackToInterpreter}, //"psq_stx", OPTYPE_PS, 0}}, + {38, &JitArm64::FallBackToInterpreter}, //"psq_lux", OPTYPE_PS, 0}}, + {39, &JitArm64::FallBackToInterpreter}, //"psq_stux", OPTYPE_PS, 0}}, +}; + +static GekkoOPTemplate table19[] = +{ + {528, &JitArm64::bcctrx}, //"bcctrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {16, &JitArm64::bclrx}, //"bclrx", OPTYPE_BRANCH, FL_ENDBLOCK}}, + {257, &JitArm64::FallBackToInterpreter}, //"crand", OPTYPE_CR, FL_EVIL}}, + {129, &JitArm64::FallBackToInterpreter}, //"crandc", OPTYPE_CR, FL_EVIL}}, + {289, &JitArm64::FallBackToInterpreter}, //"creqv", OPTYPE_CR, FL_EVIL}}, + {225, &JitArm64::FallBackToInterpreter}, //"crnand", OPTYPE_CR, FL_EVIL}}, + {33, &JitArm64::FallBackToInterpreter}, //"crnor", OPTYPE_CR, FL_EVIL}}, + {449, &JitArm64::FallBackToInterpreter}, //"cror", OPTYPE_CR, FL_EVIL}}, + {417, &JitArm64::FallBackToInterpreter}, //"crorc", OPTYPE_CR, FL_EVIL}}, + {193, &JitArm64::FallBackToInterpreter}, //"crxor", OPTYPE_CR, FL_EVIL}}, + + {150, &JitArm64::FallBackToInterpreter}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, + {0, &JitArm64::FallBackToInterpreter}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, + + {50, &JitArm64::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, + {18, &JitArm64::Break}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}} +}; + + +static GekkoOPTemplate table31[] = +{ + {28, &JitArm64::FallBackToInterpreter}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {60, &JitArm64::FallBackToInterpreter}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {444, &JitArm64::FallBackToInterpreter}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {124, &JitArm64::FallBackToInterpreter}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {316, &JitArm64::FallBackToInterpreter}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {412, &JitArm64::FallBackToInterpreter}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {476, &JitArm64::FallBackToInterpreter}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {284, &JitArm64::FallBackToInterpreter}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, + {0, &JitArm64::FallBackToInterpreter}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm64::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {26, &JitArm64::FallBackToInterpreter}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {922, &JitArm64::FallBackToInterpreter}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {954, &JitArm64::FallBackToInterpreter}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, + {536, &JitArm64::FallBackToInterpreter}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {792, &JitArm64::FallBackToInterpreter}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {824, &JitArm64::FallBackToInterpreter}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + {24, &JitArm64::FallBackToInterpreter}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}}, + + {54, &JitArm64::FallBackToInterpreter}, //"dcbst", OPTYPE_DCACHE, 0, 4}}, + {86, &JitArm64::FallBackToInterpreter}, //"dcbf", OPTYPE_DCACHE, 0, 4}}, + {246, &JitArm64::FallBackToInterpreter}, //"dcbtst", OPTYPE_DCACHE, 0, 1}}, + {278, &JitArm64::FallBackToInterpreter}, //"dcbt", OPTYPE_DCACHE, 0, 1}}, + {470, &JitArm64::FallBackToInterpreter}, //"dcbi", OPTYPE_DCACHE, 0, 4}}, + {758, &JitArm64::FallBackToInterpreter}, //"dcba", OPTYPE_DCACHE, 0, 4}}, + {1014, &JitArm64::FallBackToInterpreter}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, + + //load word + {23, &JitArm64::FallBackToInterpreter}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {55, &JitArm64::FallBackToInterpreter}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword + {279, &JitArm64::FallBackToInterpreter}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {311, &JitArm64::FallBackToInterpreter}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load halfword signextend + {343, &JitArm64::FallBackToInterpreter}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {375, &JitArm64::FallBackToInterpreter}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte + {87, &JitArm64::FallBackToInterpreter}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {119, &JitArm64::FallBackToInterpreter}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //load byte reverse + {534, &JitArm64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {790, &JitArm64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + + // Conditional load/store (Wii SMP) + {150, &JitArm64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, + {20, &JitArm64::FallBackToInterpreter}, //"lwarx", OPTYPE_LOAD, FL_EVIL | FL_OUT_D | FL_IN_A0B | FL_SET_CR0}}, + + //load string (interpret these) + {533, &JitArm64::FallBackToInterpreter}, //"lswx", OPTYPE_LOAD, FL_EVIL | FL_IN_A | FL_OUT_D}}, + {597, &JitArm64::FallBackToInterpreter}, //"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, + + //store word + {151, &JitArm64::FallBackToInterpreter}, //"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, &JitArm64::FallBackToInterpreter}, //"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store halfword + {407, &JitArm64::FallBackToInterpreter}, //"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, &JitArm64::FallBackToInterpreter}, //"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store byte + {215, &JitArm64::FallBackToInterpreter}, //"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, &JitArm64::FallBackToInterpreter}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + + //store bytereverse + {662, &JitArm64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {918, &JitArm64::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, + + {661, &JitArm64::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, + {725, &JitArm64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, + + // fp load/store + {535, &JitArm64::FallBackToInterpreter}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {567, &JitArm64::FallBackToInterpreter}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm64::FallBackToInterpreter}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm64::FallBackToInterpreter}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + + {663, &JitArm64::FallBackToInterpreter}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm64::FallBackToInterpreter}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm64::FallBackToInterpreter}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm64::FallBackToInterpreter}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {983, &JitArm64::FallBackToInterpreter}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + + {19, &JitArm64::FallBackToInterpreter}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}}, + {83, &JitArm64::FallBackToInterpreter}, //"mfmsr", OPTYPE_SYSTEM, FL_OUT_D}}, + {144, &JitArm64::FallBackToInterpreter}, //"mtcrf", OPTYPE_SYSTEM, 0}}, + {146, &JitArm64::mtmsr}, //"mtmsr", OPTYPE_SYSTEM, FL_ENDBLOCK}}, + {210, &JitArm64::FallBackToInterpreter}, //"mtsr", OPTYPE_SYSTEM, 0}}, + {242, &JitArm64::FallBackToInterpreter}, //"mtsrin", OPTYPE_SYSTEM, 0}}, + {339, &JitArm64::FallBackToInterpreter}, //"mfspr", OPTYPE_SPR, FL_OUT_D}}, + {467, &JitArm64::FallBackToInterpreter}, //"mtspr", OPTYPE_SPR, 0, 2}}, + {371, &JitArm64::FallBackToInterpreter}, //"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER}}, + {512, &JitArm64::FallBackToInterpreter}, //"mcrxr", OPTYPE_SYSTEM, 0}}, + {595, &JitArm64::FallBackToInterpreter}, //"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + {659, &JitArm64::FallBackToInterpreter}, //"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 2}}, + + {4, &JitArm64::Break}, //"tw", OPTYPE_SYSTEM, FL_ENDBLOCK, 1}}, + {598, &JitArm64::FallBackToInterpreter}, //"sync", OPTYPE_SYSTEM, 0, 2}}, + {982, &JitArm64::icbi}, //"icbi", OPTYPE_SYSTEM, FL_ENDBLOCK, 3}}, + + // Unused instructions on GC + {310, &JitArm64::FallBackToInterpreter}, //"eciwx", OPTYPE_INTEGER, FL_RC_BIT}}, + {438, &JitArm64::FallBackToInterpreter}, //"ecowx", OPTYPE_INTEGER, FL_RC_BIT}}, + {854, &JitArm64::FallBackToInterpreter}, //"eieio", OPTYPE_INTEGER, FL_RC_BIT}}, + {306, &JitArm64::FallBackToInterpreter}, //"tlbie", OPTYPE_SYSTEM, 0}}, + {370, &JitArm64::FallBackToInterpreter}, //"tlbia", OPTYPE_SYSTEM, 0}}, + {566, &JitArm64::FallBackToInterpreter}, //"tlbsync", OPTYPE_SYSTEM, 0}}, +}; + +static GekkoOPTemplate table31_2[] = +{ + {266, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {778, &JitArm64::FallBackToInterpreter}, //"addx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {10, &JitArm64::FallBackToInterpreter}, //"addcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {522, &JitArm64::FallBackToInterpreter}, //"addcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {138, &JitArm64::FallBackToInterpreter}, //"addex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {650, &JitArm64::FallBackToInterpreter}, //"addeox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {234, &JitArm64::FallBackToInterpreter}, //"addmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {202, &JitArm64::FallBackToInterpreter}, //"addzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {491, &JitArm64::FallBackToInterpreter}, //"divwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {1003, &JitArm64::FallBackToInterpreter}, //"divwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {459, &JitArm64::FallBackToInterpreter}, //"divwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {971, &JitArm64::FallBackToInterpreter}, //"divwuox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 39}}, + {75, &JitArm64::FallBackToInterpreter}, //"mulhwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {11, &JitArm64::FallBackToInterpreter}, //"mulhwux", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {235, &JitArm64::FallBackToInterpreter}, //"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {747, &JitArm64::FallBackToInterpreter}, //"mullwox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT, 4}}, + {104, &JitArm64::FallBackToInterpreter}, //"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {40, &JitArm64::FallBackToInterpreter}, //"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {552, &JitArm64::FallBackToInterpreter}, //"subox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_RC_BIT}}, + {8, &JitArm64::FallBackToInterpreter}, //"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {520, &JitArm64::FallBackToInterpreter}, //"subfcox", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_SET_CA | FL_RC_BIT}}, + {136, &JitArm64::FallBackToInterpreter}, //"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {232, &JitArm64::FallBackToInterpreter}, //"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, + {200, &JitArm64::FallBackToInterpreter}, //"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_AB | FL_READ_CA | FL_SET_CA | FL_RC_BIT}}, +}; + +static GekkoOPTemplate table59[] = +{ + {18, &JitArm64::FallBackToInterpreter}, //{"fdivsx", OPTYPE_FPU, FL_RC_BIT_F, 16}}, + {20, &JitArm64::FallBackToInterpreter}, //"fsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::FallBackToInterpreter}, //"faddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +// {22, &JitArm64::FallBackToInterpreter}, //"fsqrtsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {24, &JitArm64::FallBackToInterpreter}, //"fresx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::FallBackToInterpreter}, //"fmulsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::FallBackToInterpreter}, //"fmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::FallBackToInterpreter}, //"fmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::FallBackToInterpreter}, //"fnmsubsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::FallBackToInterpreter}, //"fnmaddsx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + +static GekkoOPTemplate table63[] = +{ + {264, &JitArm64::FallBackToInterpreter}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm64::FallBackToInterpreter}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm64::FallBackToInterpreter}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {14, &JitArm64::FallBackToInterpreter}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, + {15, &JitArm64::FallBackToInterpreter}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, + {72, &JitArm64::FallBackToInterpreter}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}}, + {136, &JitArm64::FallBackToInterpreter}, //"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}}, + {40, &JitArm64::FallBackToInterpreter}, //"fnegx", OPTYPE_FPU, FL_RC_BIT_F}}, + {12, &JitArm64::FallBackToInterpreter}, //"frspx", OPTYPE_FPU, FL_RC_BIT_F}}, + + {64, &JitArm64::FallBackToInterpreter}, //"mcrfs", OPTYPE_SYSTEMFP, 0}}, + {583, &JitArm64::FallBackToInterpreter}, //"mffsx", OPTYPE_SYSTEMFP, 0}}, + {70, &JitArm64::FallBackToInterpreter}, //"mtfsb0x", OPTYPE_SYSTEMFP, 0, 2}}, + {38, &JitArm64::FallBackToInterpreter}, //"mtfsb1x", OPTYPE_SYSTEMFP, 0, 2}}, + {134, &JitArm64::FallBackToInterpreter}, //"mtfsfix", OPTYPE_SYSTEMFP, 0, 2}}, + {711, &JitArm64::FallBackToInterpreter}, //"mtfsfx", OPTYPE_SYSTEMFP, 0, 2}}, +}; + +static GekkoOPTemplate table63_2[] = +{ + {18, &JitArm64::FallBackToInterpreter}, //"fdivx", OPTYPE_FPU, FL_RC_BIT_F, 30}}, + {20, &JitArm64::FallBackToInterpreter}, //"fsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {21, &JitArm64::FallBackToInterpreter}, //"faddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {22, &JitArm64::FallBackToInterpreter}, //"fsqrtx", OPTYPE_FPU, FL_RC_BIT_F}}, + {23, &JitArm64::FallBackToInterpreter}, //"fselx", OPTYPE_FPU, FL_RC_BIT_F}}, + {25, &JitArm64::FallBackToInterpreter}, //"fmulx", OPTYPE_FPU, FL_RC_BIT_F}}, + {26, &JitArm64::FallBackToInterpreter}, //"frsqrtex", OPTYPE_FPU, FL_RC_BIT_F}}, + {28, &JitArm64::FallBackToInterpreter}, //"fmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {29, &JitArm64::FallBackToInterpreter}, //"fmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, + {30, &JitArm64::FallBackToInterpreter}, //"fnmsubx", OPTYPE_FPU, FL_RC_BIT_F}}, + {31, &JitArm64::FallBackToInterpreter}, //"fnmaddx", OPTYPE_FPU, FL_RC_BIT_F}}, +}; + + +namespace JitArm64Tables +{ + +void CompileInstruction(PPCAnalyst::CodeOp & op) +{ + JitArm64 *jitarm = (JitArm64 *)jit; + (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); + GekkoOPInfo *info = op.opinfo; + if (info) + { +#ifdef OPLOG + if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs" + rsplocations.push_back(jit.js.compilerPC); + } +#endif + info->compileCount++; + info->lastUse = jit->js.compilerPC; + } +} + +void InitTables() +{ + // once initialized, tables are read-only + static bool initialized = false; + if (initialized) + return; + + //clear + for (int i = 0; i < 32; i++) + { + dynaOpTable59[i] = &JitArm64::unknown_instruction; + } + + for (int i = 0; i < 1024; i++) + { + dynaOpTable4 [i] = &JitArm64::unknown_instruction; + dynaOpTable19[i] = &JitArm64::unknown_instruction; + dynaOpTable31[i] = &JitArm64::unknown_instruction; + dynaOpTable63[i] = &JitArm64::unknown_instruction; + } + + for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) + { + dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_2[j].opcode; + dynaOpTable4[op] = table4_2[j].Inst; + } + } + + for (int i = 0; i < 16; i++) + { + int fill = i << 6; + for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill+table4_3[j].opcode; + dynaOpTable4[op] = table4_3[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) + { + int op = table4[i].opcode; + dynaOpTable4[op] = table4[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) + { + int op = table31[i].opcode; + dynaOpTable31[op] = table31[i].Inst; + } + + for (int i = 0; i < 1; i++) + { + int fill = i << 9; + for (int j = 0; j < (int)(sizeof(table31_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table31_2[j].opcode; + dynaOpTable31[op] = table31_2[j].Inst; + } + } + + for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) + { + int op = table19[i].opcode; + dynaOpTable19[op] = table19[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) + { + int op = table59[i].opcode; + dynaOpTable59[op] = table59[i].Inst; + } + + for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) + { + int op = table63[i].opcode; + dynaOpTable63[op] = table63[i].Inst; + } + + for (int i = 0; i < 32; i++) + { + int fill = i << 5; + for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) + { + int op = fill + table63_2[j].opcode; + dynaOpTable63[op] = table63_2[j].Inst; + } + } + + initialized = true; + +} + +} // namespace diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h new file mode 100644 index 0000000000..d1788e404e --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.h @@ -0,0 +1,14 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/PPCTables.h" + +namespace JitArm64Tables +{ + void CompileInstruction(PPCAnalyst::CodeOp & op); + void InitTables(); +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp new file mode 100644 index 0000000000..2b5e4a5982 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -0,0 +1,80 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include "Common/Arm64Emitter.h" + +#include "Core/PowerPC/PowerPC.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitAsm.h" +#include "Core/PowerPC/JitCommon/JitCache.h" + +using namespace Arm64Gen; + +void JitArm64AsmRoutineManager::Generate() +{ + enterCode = GetCodePtr(); + + MOVI2R(X29, (u64)&PowerPC::ppcState); + + dispatcher = GetCodePtr(); + printf("Dispatcher is %p\n", dispatcher); + // Downcount Check + // The result of slice decrementation should be in flags if somebody jumped here + // IMPORTANT - We jump on negative, not carry!!! + FixupBranch bail = B(CC_MI); + + dispatcherNoCheck = GetCodePtr(); + + // This block of code gets the address of the compiled block of code + // It runs though to the compiling portion if it isn't found + LDR(INDEX_UNSIGNED, W28, X29, PPCSTATE_OFF(pc)); // Load the current PC into W28 + BFM(W28, WSP, 3, 2); // Wipe the top 3 bits. Same as PC & JIT_ICACHE_MASK + + MOVI2R(X27, (u64)jit->GetBlockCache()->iCache); + LDR(W27, X27, X28); + + FixupBranch JitBlock = TBNZ(W27, 7); // Test the 7th bit + // Success, it is our Jitblock. + MOVI2R(X30, (u64)jit->GetBlockCache()->GetCodePointers()); + UBFM(X27, X27, 61, 60); // Same as X27 << 3 + LDR(X30, X30, X27); // Load the block address in to R14 + BR(X30); + // No need to jump anywhere after here, the block will go back to dispatcher start + + SetJumpTarget(JitBlock); + + MOVI2R(X30, (u64)&Jit); + BLR(X30); + + B(dispatcherNoCheck); + + SetJumpTarget(bail); + doTiming = GetCodePtr(); + MOVI2R(X30, (u64)&CoreTiming::Advance); + BLR(X30); + + // Does exception checking + LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); + MOVI2R(X30, (u64)&PowerPC::CheckExceptions); + BLR(X30); + LDR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, W0, X29, PPCSTATE_OFF(pc)); + + // Check the state pointer to see if we are exiting + // Gets checked on every exception check + MOVI2R(W0, (u64)PowerPC::GetStatePtr()); + LDR(INDEX_UNSIGNED, W0, W0, 0); + FixupBranch Exit = CBNZ(W0); + + B(dispatcher); + + SetJumpTarget(Exit); + + FlushIcache(); +} + +void JitArm64AsmRoutineManager::GenerateCommon() +{ +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.h b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h new file mode 100644 index 0000000000..d2dba320c1 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.h @@ -0,0 +1,29 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include "Common/Arm64Emitter.h" +#include "Core/PowerPC/JitCommon/JitAsmCommon.h" + +class JitArm64AsmRoutineManager : public CommonAsmRoutinesBase, public Arm64Gen::ARM64CodeBlock +{ +private: + void Generate(); + void GenerateCommon(); + +public: + void Init() + { + AllocCodeSpace(8192); + Generate(); + WriteProtect(); + } + + void Shutdown() + { + FreeCodeSpace(); + } +}; + diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index e7c1d1ed2a..ea9b12be70 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -29,6 +29,11 @@ #include "Core/PowerPC/JitArm32/JitArm_Tables.h" #endif +#if _M_ARM_64 +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArm64/JitArm64_Tables.h" +#endif + static bool bFakeVMEM = false; bool bMMU = false; @@ -66,6 +71,13 @@ namespace JitInterface break; } #endif + #if _M_ARM_64 + case 4: + { + ptr = new JitArm64(); + break; + } + #endif default: { PanicAlert("Unrecognizable cpu_core: %d", core); @@ -100,6 +112,13 @@ namespace JitInterface break; } #endif + #if _M_ARM_64 + case 4: + { + JitArm64Tables::InitTables(); + break; + } + #endif default: { PanicAlert("Unrecognizable cpu_core: %d", core); diff --git a/Source/Core/Core/PowerPC/PPCAnalyst.h b/Source/Core/Core/PowerPC/PPCAnalyst.h index 0916e3951e..64d24fdcca 100644 --- a/Source/Core/Core/PowerPC/PPCAnalyst.h +++ b/Source/Core/Core/PowerPC/PPCAnalyst.h @@ -69,6 +69,11 @@ struct BlockRegStats std::min(firstRead[reg], firstWrite[reg]); } + bool IsUsed(int reg) + { + return (numReads[reg] + numWrites[reg]) > 0; + } + inline void SetInputRegister(int reg, short opindex) { if (firstRead[reg] == -1) From 5233c87dec344770ba0e4f03e765bbc808c05a21 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 30 Aug 2014 17:26:47 -0500 Subject: [PATCH 3/5] Add immediate support to the GPR cache. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 26dd213f56..58c21c2ae1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -209,6 +209,15 @@ public: // Will dump an immediate to the host register as well ARM64Reg R(u32 preg); + // Set a register to an immediate + void SetImmediate(u32 reg, u32 imm) { m_guest_registers[reg].LoadToImm(imm); } + + // Returns if a register is set as an immediate + bool IsImm(u32 reg) { return m_guest_registers[reg].GetType() == REG_IMM; } + + // Gets the immediate that a register is set to + u32 GetImm(u32 reg) { return m_guest_registers[reg].GetImm(); } + protected: // Get the order of the host registers void GetAllocationOrder(void); From 82dae72b63a388cff83ddb443002d65cab57f514 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 30 Aug 2014 18:02:14 -0500 Subject: [PATCH 4/5] Add AArch64 target to the backpatch header. This fixes the issue of a million warnings spawning from this header. --- Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h index 3a573f7531..26a916ac65 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.h @@ -83,6 +83,11 @@ const int BACKPATCH_SIZE = 5; #define CTX_R14 gregs[REG_R14] #define CTX_R15 gregs[REG_R15] #define CTX_RIP gregs[REG_RIP] + #elif _M_ARM_64 + typedef struct sigcontext SContext; + #define CTX_REG(x) regs[x] + #define CTX_SP sp + #define CTX_PC pc #elif _M_ARM_32 // Add others if required. typedef struct sigcontext SContext; From cdd2bd6cdc7802c907dda620ff265dd8dde69cdc Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 2 Sep 2014 18:37:01 -0500 Subject: [PATCH 5/5] Add a couple of asserts to the Arm64 JIT to make sure we can load everything. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index a1ff3e525d..ffa7a060f6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -14,6 +14,11 @@ #include "Core/PowerPC/JitCommon/JitBase.h" #define PPCSTATE_OFF(elem) ((s64)&PowerPC::ppcState.elem - (s64)&PowerPC::ppcState) + +// Some asserts to make sure we will be able to load everything +static_assert(PPCSTATE_OFF(spr[1023]) <= 16380, "LDR(32bit) can't reach the last SPR"); +static_assert((PPCSTATE_OFF(ps[0][0]) % 8) == 0, "LDR(64bit VFP) requires FPRs to be 8 byte aligned"); + using namespace Arm64Gen; class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock {