From f1d727cf1376efebf434abcaa31c63abd2894dad Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 27 Feb 2013 11:53:24 -0600 Subject: [PATCH] Merge latest ArmEmitter changes from ppsspp while we're at it. --- Source/Core/Common/Src/ArmEmitter.cpp | 137 +++++++++++++++++++++----- Source/Core/Common/Src/ArmEmitter.h | 16 +-- 2 files changed, 123 insertions(+), 30 deletions(-) diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp index f24ffbb07f..3b3bf7c550 100644 --- a/Source/Core/Common/Src/ArmEmitter.cpp +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -22,11 +22,16 @@ #include #include -// For cache flushing on Symbian/Blackberry +// For cache flushing on Symbian/iOS/Blackberry #ifdef __SYMBIAN32__ #include #endif +#ifdef IOS +#include +#include +#endif + #ifdef BLACKBERRY #include #endif @@ -185,18 +190,16 @@ void ARMXEmitter::FlushIcache() void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end) { #ifdef __SYMBIAN32__ - User::IMB_Range( start, end); + User::IMB_Range(start, end); #elif defined(BLACKBERRY) msync(start, end - start, MS_SYNC | MS_INVALIDATE_ICACHE); -#else -#ifndef _WIN32 -#ifdef ANDROID - __builtin___clear_cache (start, end); -#else - // If on Linux, we HAVE to clear from start addr or else everything gets /really/ unstable - __builtin___clear_cache (startcode, end); -#endif +#elif defined(IOS) + sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); +#elif !defined(_WIN32) +#ifndef ANDROID + start = startcode; #endif + __builtin___clear_cache(start, end); #endif } @@ -493,6 +496,7 @@ void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) { Write4OpMultiply(0xC, destLo, destHi, rn, rm); } + void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2) { Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2); @@ -528,9 +532,10 @@ void ARMXEmitter::MRS (ARMReg dest) { Write32(condition | (16 << 20) | (15 << 16) | (dest << 12)); } + void ARMXEmitter::WriteStoreOp(u32 op, ARMReg dest, ARMReg src, Operand2 op2) { - if (op2.GetData() == 0) // Don't index + if (op2.GetData() == 0) // set the preindex bit, but not the W bit! Write32(condition | 0x01800000 | (op << 20) | (dest << 16) | (src << 12) | op2.Imm12()); else Write32(condition | (op << 20) | (3 << 23) | (dest << 16) | (src << 12) | op2.Imm12()); @@ -620,6 +625,7 @@ ARMReg ARMXEmitter::SubBase(ARMReg Reg) } return Reg; } + // NEON Specific void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { @@ -627,7 +633,7 @@ void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it"); bool register_quad = Vd >= Q0; - + // Gets encoded as a double register Vd = SubBase(Vd); Vn = SubBase(Vn); @@ -651,16 +657,20 @@ void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \ | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \ | ((Vm & 0x10) << 2) | (Vm & 0xF)); - } // VFP Specific -void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 op) +void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 offset) { _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); - _assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned"); + _assert_msg_(DYNA_REC, (offset & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough"); + + if (offset & 0xC03) { + ERROR_LOG(DYNA_REC, "VLDR: Bad offset %08x", offset); + } + bool single_reg = Dest < D0; Dest = SubBase(Dest); @@ -668,20 +678,25 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, u16 op) if (single_reg) { Write32(NO_COND | (0x1B << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ - | ((Dest & 0x1E) << 11) | (10 << 8) | (op >> 2)); + | ((Dest & 0x1E) << 11) | (10 << 8) | (offset >> 2)); } else { Write32(NO_COND | (0x1B << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ - | ((Dest & 0xF) << 12) | (11 << 8) | (op >> 2)); + | ((Dest & 0xF) << 12) | (11 << 8) | (offset >> 2)); } } -void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 op) +void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 offset) { _assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); - _assert_msg_(DYNA_REC, !(op & 3), "Offset needs to be word aligned"); + _assert_msg_(DYNA_REC, (offset & 0xC03) == 0, "VSTR: Offset needs to be word aligned"); + + if (offset & 0xC03) { + ERROR_LOG(DYNA_REC, "VSTR: Bad offset %08x", offset); + } + bool single_reg = Src < D0; Src = SubBase(Src); @@ -689,13 +704,13 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, u16 op) if (single_reg) { Write32(NO_COND | (0x1B << 23) | ((Src & 0x1) << 22) | (Base << 16) \ - | ((Src & 0x1E) << 11) | (10 << 8) | (op >> 2)); + | ((Src & 0x1E) << 11) | (10 << 8) | (offset >> 2)); } else { Write32(NO_COND | (0x1B << 23) | ((Src & 0x10) << 18) | (Base << 16) \ - | ((Src & 0xF) << 12) | (11 << 8) | (op >> 2)); + | ((Src & 0xF) << 12) | (11 << 8) | (offset >> 2)); } } void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm) @@ -779,12 +794,14 @@ void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm) | ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } } + // VFP and ASIMD void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm) { _assert_msg_(DYNA_REC, Vd < Q0, "VABS doesn't currently support Quad reg"); _assert_msg_(DYNA_REC, Vd >= S0, "VABS doesn't support ARM Regs"); bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; Vd = SubBase(Vd); Vm = SubBase(Vm); @@ -796,10 +813,19 @@ void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm) } else { - Write32(NO_COND | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \ - | (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF)); + if (double_reg) + { + Write32(NO_COND | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \ + | (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); + // XXX: TODO + } } } + void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) { _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); @@ -870,6 +896,70 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) } } } +void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); + _assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VADD"); + _assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VADD"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x2 << 20) \ + | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ + | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + if (double_reg) + { + Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x2 << 20) \ + | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ + | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VMUL with Quad Reg without support!"); + // XXX: TODO + } + } +} + +void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "VNEG doesn't currently support Quad reg"); + _assert_msg_(DYNA_REC, Vd >= S0, "VNEG doesn't support ARM Regs"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ + | ((Vd & 0x1E) << 11) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + if (double_reg) + { + Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ + | ((Vd & 0xF) << 12) | (0x2D << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + } + else + { + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VNEG with Quad Reg without support!"); + // XXX: TODO + + } + } +} void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) { @@ -881,6 +971,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) Write32(NO_COND | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ | (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4)); } + void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) { if (Dest > R15) diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h index 13635867a9..a3ff7d25ae 100644 --- a/Source/Core/Common/Src/ArmEmitter.h +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -227,7 +227,7 @@ public: } const u32 IMMSR() // IMM shifted register { - _assert_msg_(DYNA_REC, Type = TYPE_IMMSREG, "IMMSR must be imm shifted register"); + _assert_msg_(DYNA_REC, Type == TYPE_IMMSREG, "IMMSR must be imm shifted register"); return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value); } const u32 RSR() // Register shifted register @@ -343,8 +343,9 @@ private: void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2); + void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - + // New Ops void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false); @@ -452,7 +453,7 @@ public: void MUL (ARMReg dest, ARMReg src, ARMReg op2); void MULS(ARMReg dest, ARMReg src, ARMReg op2); - + void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); @@ -498,8 +499,8 @@ public: void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); // VFP Only - void VLDR(ARMReg Dest, ARMReg Base, u16 op); - void VSTR(ARMReg Src, ARMReg Base, u16 op); + void VLDR(ARMReg Dest, ARMReg Base, u16 offset); + void VSTR(ARMReg Src, ARMReg Base, u16 offset); void VCMP(ARMReg Vd, ARMReg Vm); // Compares against zero void VCMP(ARMReg Vd); @@ -507,10 +508,11 @@ public: void VSQRT(ARMReg Vd, ARMReg Vm); // NEON and VFP - void VABS(ARMReg Vd, ARMReg Vm); void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm); - + void VABS(ARMReg Vd, ARMReg Vm); + void VNEG(ARMReg Vd, ARMReg Vm); + void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VMOV(ARMReg Dest, ARMReg Src, bool high); void VMOV(ARMReg Dest, ARMReg Src);