diff --git a/Source/Core/Common/Src/x64ABI.cpp b/Source/Core/Common/Src/x64ABI.cpp index c47ea02fb4..1b4fa0b5ff 100644 --- a/Source/Core/Common/Src/x64ABI.cpp +++ b/Source/Core/Common/Src/x64ABI.cpp @@ -10,38 +10,51 @@ using namespace Gen; // Shared code between Win64 and Unix64 -// Sets up a __cdecl function. -void XEmitter::ABI_EmitPrologue(int maxCallParams) -{ -#ifdef _M_IX86 - // Don't really need to do anything -#elif defined(_M_X64) -#if _WIN32 - int stacksize = ((maxCallParams + 1) & ~1) * 8 + 8; - // Set up a stack frame so that we can call functions - // TODO: use maxCallParams - SUB(64, R(RSP), Imm8(stacksize)); -#endif +unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog) { + // On platforms other than Windows 32-bit: At the beginning of a function, + // the stack pointer is 4/8 bytes less than a multiple of 16; however, the + // function prolog immediately subtracts an appropriate amount to align + // it, so no alignment is required around a call. + // In the functions generated by ThunkManager::ProtectFunction and some + // others, we add the necessary subtraction (and 0x20 bytes shadow space + // for Win64) into this rather than having a separate prolog. + // On Windows 32-bit, the required alignment is only 4 bytes, so we just + // ensure that the frame size isn't misaligned. +#ifdef _M_X64 + // expect frameSize == 0 + frameSize = noProlog ? 0x28 : 0; +#elif defined(_WIN32) + frameSize = (frameSize + 3) & -4; #else -#error Arch not supported + unsigned int existingAlignment = noProlog ? 0xc : 0; + frameSize -= existingAlignment; + frameSize = (frameSize + 15) & -16; + frameSize += existingAlignment; #endif + return frameSize; } -void XEmitter::ABI_EmitEpilogue(int maxCallParams) -{ -#ifdef _M_IX86 - RET(); -#elif defined(_M_X64) -#ifdef _WIN32 - int stacksize = ((maxCallParams+1)&~1)*8 + 8; - ADD(64, R(RSP), Imm8(stacksize)); -#endif - RET(); +void XEmitter::ABI_AlignStack(unsigned int frameSize, bool noProlog) { + unsigned int fillSize = + ABI_GetAlignedFrameSize(frameSize, noProlog) - frameSize; + if (fillSize != 0) { +#ifdef _M_X64 + SUB(64, R(RSP), Imm8(fillSize)); #else -#error Arch not supported - - + SUB(32, R(ESP), Imm8(fillSize)); #endif + } +} + +void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) { + unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize, noProlog); + if (alignedSize != 0) { +#ifdef _M_X64 + ADD(64, R(RSP), Imm8(alignedSize)); +#else + ADD(32, R(ESP), Imm8(alignedSize)); +#endif + } } #ifdef _M_IX86 // All32 @@ -65,7 +78,7 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { PUSH(16, Imm16(param2)); PUSH(32, Imm32(param1)); CALL(func); - ABI_RestoreStack(1 * 2 + 1 * 4); + ABI_AlignStack(1 * 2 + 1 * 4); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { @@ -129,13 +142,13 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { } // Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2) +void XEmitter::ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2, bool noProlog) { - ABI_AlignStack(2 * 4); + ABI_AlignStack(2 * 4, noProlog); PUSH(32, R(reg2)); PUSH(32, R(reg1)); CALL(func); - ABI_RestoreStack(2 * 4); + ABI_RestoreStack(2 * 4, noProlog); } void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) @@ -156,60 +169,27 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) } void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - // Note: 4 * 4 = 16 bytes, so alignment is preserved. PUSH(EBP); + MOV(32, R(EBP), R(ESP)); PUSH(EBX); PUSH(ESI); PUSH(EDI); + SUB(32, R(ESP), Imm8(0xc)); } void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { + ADD(32, R(ESP), Imm8(0xc)); POP(EDI); POP(ESI); POP(EBX); POP(EBP); } -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { - frameSize += 4; // reserve space for return address - unsigned int alignedSize = -#ifdef __GNUC__ - (frameSize + 15) & -16; -#else - (frameSize + 3) & -4; -#endif - return alignedSize; -} - - -void XEmitter::ABI_AlignStack(unsigned int frameSize) { -// Mac OS X requires the stack to be 16-byte aligned before every call. -// Linux requires the stack to be 16-byte aligned before calls that put SSE -// vectors on the stack, but since we do not keep track of which calls do that, -// it is effectively every call as well. -// Windows binaries compiled with MSVC do not have such a restriction*, but I -// expect that GCC on Windows acts the same as GCC on Linux in this respect. -// It would be nice if someone could verify this. -// *However, the MSVC optimizing compiler assumes a 4-byte-aligned stack at times. - unsigned int fillSize = - ABI_GetAlignedFrameSize(frameSize) - (frameSize + 4); - if (fillSize != 0) { - SUB(32, R(ESP), Imm8(fillSize)); - } -} - -void XEmitter::ABI_RestoreStack(unsigned int frameSize) { - unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize); - alignedSize -= 4; // return address is POPped at end of call - if (alignedSize != 0) { - ADD(32, R(ESP), Imm8(alignedSize)); - } -} - #else //64bit // Common functions void XEmitter::ABI_CallFunction(void *func) { + ABI_AlignStack(0); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { @@ -219,9 +199,11 @@ void XEmitter::ABI_CallFunction(void *func) { } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL @@ -232,9 +214,11 @@ void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -246,9 +230,11 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL @@ -259,9 +245,11 @@ void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -273,9 +261,11 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -288,9 +278,11 @@ void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), Imm64((u64)param3)); @@ -303,9 +295,11 @@ void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *par } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 param3, void *param4) { + ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -319,9 +313,11 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) { + ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -334,10 +330,12 @@ void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 p } else { CALL(func); } + ABI_RestoreStack(0); } // Pass a register as a parameter. void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { + ABI_AlignStack(0); if (reg1 != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R(reg1)); u64 distance = u64(func) - (u64(code) + 5); @@ -349,10 +347,12 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { } else { CALL(func); } + ABI_RestoreStack(0); } // Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { +void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) { + ABI_AlignStack(0, noProlog); if (reg2 != ABI_PARAM1) { if (reg1 != ABI_PARAM1) MOV(64, R(ABI_PARAM1), R(reg1)); @@ -373,10 +373,12 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { } else { CALL(func); } + ABI_RestoreStack(0, noProlog); } void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { + ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); @@ -389,10 +391,12 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2 } else { CALL(func); } + ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { + ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); u64 distance = u64(func) - (u64(code) + 5); @@ -404,42 +408,9 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) } else { CALL(func); } -} - -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize) { - return frameSize; -} - -#ifdef _WIN32 - -// Win64 Specific Code -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - //we only want to do this once - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(RBP); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - //TODO: Also preserve XMM0-3? - ABI_AlignStack(0); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { ABI_RestoreStack(0); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBP); - POP(RDI); - POP(RSI); - POP(RBX); } -// Win64 Specific Code void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { PUSH(RCX); PUSH(RDX); @@ -449,12 +420,11 @@ void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { PUSH(R9); PUSH(R10); PUSH(R11); - //TODO: Also preserve XMM0-15? - ABI_AlignStack(0); + PUSH(R11); } void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { - ABI_RestoreStack(0); + POP(R11); POP(R11); POP(R10); POP(R9); @@ -465,66 +435,59 @@ void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { POP(RCX); } -void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { + +#ifdef _WIN32 +// Win64 Specific Code + +void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { + //we only want to do this once + PUSH(RBP); + MOV(64, R(RBP), R(RSP)); + PUSH(RBX); + PUSH(RSI); + PUSH(RDI); + PUSH(R12); + PUSH(R13); + PUSH(R14); + PUSH(R15); SUB(64, R(RSP), Imm8(0x28)); + //TODO: Also preserve XMM0-3? } -void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { +void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { ADD(64, R(RSP), Imm8(0x28)); + POP(R15); + POP(R14); + POP(R13); + POP(R12); + POP(RDI); + POP(RSI); + POP(RBX); + POP(RBP); } #else // Unix64 Specific Code + void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() { - PUSH(RBX); PUSH(RBP); + MOV(64, R(RBP), R(RSP)); + PUSH(RBX); PUSH(R12); PUSH(R13); PUSH(R14); PUSH(R15); - PUSH(R15); //just to align stack. duped push/pop doesn't hurt. + SUB(64, R(RSP), Imm8(8)); } void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() { - POP(R15); + ADD(64, R(RSP), Imm8(8)); POP(R15); POP(R14); POP(R13); POP(R12); - POP(RBP); POP(RBX); -} - -void XEmitter::ABI_PushAllCallerSavedRegsAndAdjustStack() { - PUSH(RCX); - PUSH(RDX); - PUSH(RSI); - PUSH(RDI); - PUSH(R8); - PUSH(R9); - PUSH(R10); - PUSH(R11); - PUSH(R11); -} - -void XEmitter::ABI_PopAllCallerSavedRegsAndAdjustStack() { - POP(R11); - POP(R11); - POP(R10); - POP(R9); - POP(R8); - POP(RDI); - POP(RSI); - POP(RDX); - POP(RCX); -} - -void XEmitter::ABI_AlignStack(unsigned int /*frameSize*/) { - SUB(64, R(RSP), Imm8(0x08)); -} - -void XEmitter::ABI_RestoreStack(unsigned int /*frameSize*/) { - ADD(64, R(RSP), Imm8(0x08)); + POP(RBP); } #endif // WIN32 diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index b068579a3a..27a735ab90 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -639,7 +639,7 @@ public: // Pass a register as a parameter. void ABI_CallFunctionR(void *func, Gen::X64Reg reg1); - void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2); + void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2, bool noProlog = false); // A function that doesn't have any control over what it will do to regs, // such as the dispatcher, should be surrounded by these. @@ -652,14 +652,9 @@ public: void ABI_PushAllCallerSavedRegsAndAdjustStack(); void ABI_PopAllCallerSavedRegsAndAdjustStack(); - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize); - void ABI_AlignStack(unsigned int frameSize); - void ABI_RestoreStack(unsigned int frameSize); - - // Sets up a __cdecl function. - // Only x64 really needs the parameter count. - void ABI_EmitPrologue(int maxCallParams); - void ABI_EmitEpilogue(int maxCallParams); + unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); + void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); + void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); #ifdef _M_IX86 inline int ABI_GetNumXMMRegs() { return 8; } diff --git a/Source/Core/Common/Src/x64Thunk.cpp b/Source/Core/Common/Src/x64Thunk.cpp index efb876ce81..a9c19060de 100644 --- a/Source/Core/Common/Src/x64Thunk.cpp +++ b/Source/Core/Common/Src/x64Thunk.cpp @@ -91,35 +91,27 @@ void *ThunkManager::ProtectFunction(void *function, int num_params) PanicAlert("Trying to protect functions before the emu is started. Bad bad bad."); const u8 *call_point = GetCodePtr(); - // Make sure to align stack. #ifdef _M_X64 -#ifdef _WIN32 - SUB(64, R(ESP), Imm8(0x28)); -#else - SUB(64, R(ESP), Imm8(0x8)); -#endif + // Make sure to align stack. + ABI_AlignStack(0, true); CALL((void*)save_regs); CALL((void*)function); CALL((void*)load_regs); -#ifdef _WIN32 - ADD(64, R(ESP), Imm8(0x28)); -#else - ADD(64, R(ESP), Imm8(0x8)); -#endif + ABI_RestoreStack(0, true); RET(); #else CALL((void*)save_regs); // Since parameters are in the previous stack frame, not in registers, this takes some // trickery : we simply re-push the parameters. might not be optimal, but that doesn't really // matter. - ABI_AlignStack(num_params * 4); + ABI_AlignStack(num_params * 4, true); unsigned int alignedSize = ABI_GetAlignedFrameSize(num_params * 4); for (int i = 0; i < num_params; i++) { // ESP is changing, so we do not need i - PUSH(32, MDisp(ESP, alignedSize - 4)); + PUSH(32, MDisp(ESP, alignedSize)); } CALL(function); - ABI_RestoreStack(num_params * 4); + ABI_RestoreStack(num_params * 4, true); CALL((void*)load_regs); RET(); #endif diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp index 5187ef1478..d80b0bb3f8 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/JitAsmCommon.cpp @@ -167,7 +167,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(64, MComplex(RBX, RCX, SCALE_1, 0), R(RAX)); FixupBranch skip_complex = J(); SetJumpTarget(too_complex); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&WriteDual32, 2), RAX, RCX, /* noProlog = */ true); SetJumpTarget(skip_complex); RET(); #else @@ -184,10 +184,10 @@ void CommonAsmRoutines::GenQuantizedStores() { FixupBranch arg2 = J(); SetJumpTarget(argh); MOV(32, R(EAX), M(((char*)&psTemp))); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); MOV(32, R(EAX), M(((char*)&psTemp)+4)); ADD(32, R(ECX), Imm32(4)); - ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX); + ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), EAX, ECX, /* noProlog = */ true); SetJumpTarget(arg2); RET(); #endif @@ -206,7 +206,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSDW(XMM0, R(XMM0)); PACKUSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, false); + SafeWriteRegToReg(AX, ECX, 16, 0, false, true); RET(); @@ -225,7 +225,7 @@ void CommonAsmRoutines::GenQuantizedStores() { PACKSSWB(XMM0, R(XMM0)); MOVD_xmm(R(EAX), XMM0); - SafeWriteRegToReg(AX, ECX, 16, 0, false); + SafeWriteRegToReg(AX, ECX, 16, 0, false, true); RET(); @@ -251,7 +251,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOV(16, R(AX), M((char*)psTemp + 4)); BSWAP(32, EAX); - SafeWriteRegToReg(EAX, ECX, 32, 0, false); + SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); RET(); @@ -271,7 +271,7 @@ void CommonAsmRoutines::GenQuantizedStores() { MOVD_xmm(R(EAX), XMM0); BSWAP(32, EAX); ROL(32, R(EAX), Imm8(16)); - SafeWriteRegToReg(EAX, ECX, 32, 0, false); + SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); RET(); @@ -303,11 +303,11 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { // TODO: SafeWriteFloat MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, false); + SafeWriteRegToReg(EAX, ECX, 32, 0, false, true); } else { MOVSS(M(&psTemp[0]), XMM0); MOV(32, R(EAX), M(&psTemp[0])); - SafeWriteRegToReg(EAX, ECX, 32, 0, true); + SafeWriteRegToReg(EAX, ECX, 32, 0, true, true); }*/ const u8* storeSingleU8 = AlignCode4(); // Used by MKWii @@ -318,7 +318,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_255)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, true); + SafeWriteRegToReg(AL, ECX, 8, 0, true, true); RET(); const u8* storeSingleS8 = AlignCode4(); @@ -328,7 +328,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m128)); MINSS(XMM0, M((void *)&m_127)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(AL, ECX, 8, 0, true); + SafeWriteRegToReg(AL, ECX, 8, 0, true, true); RET(); const u8* storeSingleU16 = AlignCode4(); // Used by MKWii @@ -339,7 +339,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M((void *)&m_65535)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, true); + SafeWriteRegToReg(EAX, ECX, 16, 0, true, true); RET(); const u8* storeSingleS16 = AlignCode4(); @@ -349,7 +349,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() { MAXSS(XMM0, M((void *)&m_m32768)); MINSS(XMM0, M((void *)&m_32767)); CVTTSS2SI(EAX, R(XMM0)); - SafeWriteRegToReg(EAX, ECX, 16, 0, true); + SafeWriteRegToReg(EAX, ECX, 16, 0, true, true); RET(); singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp index 18c8044385..637dff171b 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.cpp @@ -223,7 +223,7 @@ void EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int ac } // Destroys both arg registers -void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap) +void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset, bool swap, bool noProlog) { if (offset) ADD(32, R(reg_addr), Imm32((u32)offset)); @@ -247,9 +247,9 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write switch (accessSize) { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break; + case 32: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), 2), reg_value, reg_addr, noProlog); break; + case 16: ABI_CallFunctionRR(thunks.ProtectFunction(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), 2), reg_value, reg_addr, noProlog); break; + case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr, noProlog); break; } FixupBranch exit = J(); SetJumpTarget(fast); diff --git a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h index 7708ab8598..57dce19790 100644 --- a/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/Src/PowerPC/JitCommon/Jit_Util.h @@ -16,7 +16,7 @@ public: void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0, bool swap = true); void UnsafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); void SafeLoadToEAX(const Gen::OpArg & opAddress, int accessSize, s32 offset, bool signExtend); - void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true); + void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, bool swap = true, bool noProlog = false); // Trashes both inputs and EAX. void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr); diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index babaff83a5..825e9c6558 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -217,7 +217,7 @@ void VertexLoader::CompileVertexTranslator() PanicAlert("Trying to recompile a vertex translator"); m_compiledCode = GetCodePtr(); - ABI_EmitPrologue(4); + ABI_PushAllCalleeSavedRegsAndAdjustStack(); // Start loop here const u8 *loop_start = GetCodePtr(); @@ -499,7 +499,8 @@ void VertexLoader::CompileVertexTranslator() #endif J_CC(CC_NZ, loop_start, true); - ABI_EmitEpilogue(4); + ABI_PopAllCalleeSavedRegsAndAdjustStack(); + RET(); #endif m_NativeFmt->Initialize(vtx_decl); } diff --git a/Source/Core/VideoCommon/Src/x64DLCache.cpp b/Source/Core/VideoCommon/Src/x64DLCache.cpp index ae32ea5ea9..b412c2e274 100644 --- a/Source/Core/VideoCommon/Src/x64DLCache.cpp +++ b/Source/Core/VideoCommon/Src/x64DLCache.cpp @@ -409,7 +409,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) emitter.AlignCode4(); dl->compiled_code = emitter.GetCodePtr(); - emitter.ABI_EmitPrologue(4); + emitter.ABI_PushAllCalleeSavedRegsAndAdjustStack(); while (g_pVideoData < end) { @@ -572,7 +572,8 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) break; } } - emitter.ABI_EmitEpilogue(4); + emitter.ABI_PopAllCalleeSavedRegsAndAdjustStack(); + emitter.RET(); INCSTAT(stats.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled); Statistics::SwapDL();