diff --git a/Source/Core/Common/x64ABI.cpp b/Source/Core/Common/x64ABI.cpp index 45465619bd..d489301c4b 100644 --- a/Source/Core/Common/x64ABI.cpp +++ b/Source/Core/Common/x64ABI.cpp @@ -10,93 +10,86 @@ using namespace Gen; // Shared code between Win64 and Unix64 -unsigned int XEmitter::ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog) +void XEmitter::ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { - frameSize = noProlog ? 0x28 : 0; - return frameSize; -} - -void XEmitter::ABI_AlignStack(unsigned int frameSize, bool noProlog) -{ - unsigned int fillSize = ABI_GetAlignedFrameSize(frameSize, noProlog) - frameSize; - - if (fillSize != 0) - { - SUB(64, R(RSP), Imm8(fillSize)); - } -} - -void XEmitter::ABI_RestoreStack(unsigned int frameSize, bool noProlog) -{ - unsigned int alignedSize = ABI_GetAlignedFrameSize(frameSize, noProlog); - - if (alignedSize != 0) - { - ADD(64, R(RSP), Imm8(alignedSize)); - } -} - -void XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog) -{ - int regSize = 8; - int shadow = 0; + size_t shadow = 0; #if defined(_WIN32) shadow = 0x20; #endif + int count = 0; for (int r = 0; r < 16; r++) { if (mask & (1 << r)) - { - PUSH((X64Reg) r); count++; - } } - int size = ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; + rsp_alignment -= count * 8; + size_t subtraction = 0; + if (mask & 0xffff0000) + { + // If we have any XMMs to save, we must align the stack here. + subtraction = rsp_alignment & 0xf; + } for (int x = 0; x < 16; x++) { if (mask & (1 << (16 + x))) - size += 16; - } - size += shadow; - if (size) - SUB(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); - int offset = shadow; - for (int x = 0; x < 16; x++) - { - if (mask & (1 << (16 + x))) - { - MOVUPD(MDisp(RSP, offset), (X64Reg) x); - offset += 16; - } + subtraction += 16; } + size_t xmm_base_subtraction = subtraction; + subtraction += needed_frame_size; + subtraction += shadow; + // Final alignment. + rsp_alignment -= subtraction; + subtraction += rsp_alignment & 0xf; + + *shadowp = shadow; + *subtractionp = subtraction; + *xmm_offsetp = subtraction - xmm_base_subtraction; } -void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog) +size_t XEmitter::ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) { - int regSize = 8; - int size = 0; -#if defined(_WIN32) - size += 0x20; -#endif + size_t shadow, subtraction, xmm_offset; + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + + for (int r = 0; r < 16; r++) + { + if (mask & (1 << r)) + PUSH((X64Reg) r); + } + + if (subtraction) + SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); + for (int x = 0; x < 16; x++) { if (mask & (1 << (16 + x))) { - MOVUPD((X64Reg) x, MDisp(RSP, size)); - size += 16; + MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg) x); + xmm_offset += 16; } } - int count = 0; - for (int r = 0; r < 16; r++) - { - if (mask & (1 << r)) - count++; - } - size += ((noProlog ? -regSize : 0) - (count * regSize)) & 0xf; - if (size) - ADD(regSize * 8, R(RSP), size >= 0x80 ? Imm32(size) : Imm8(size)); + return shadow; +} + +void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size) +{ + size_t shadow, subtraction, xmm_offset; + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + + for (int x = 0; x < 16; x++) + { + if (mask & (1 << (16 + x))) + { + MOVAPD((X64Reg) x, MDisp(RSP, (int)xmm_offset)); + xmm_offset += 16; + } + } + + if (subtraction) + ADD(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); + for (int r = 15; r >= 0; r--) { if (mask & (1 << r)) @@ -109,7 +102,6 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog) // Common functions void XEmitter::ABI_CallFunction(void *func) { - ABI_AlignStack(0); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) @@ -122,12 +114,10 @@ void XEmitter::ABI_CallFunction(void *func) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -141,12 +131,10 @@ void XEmitter::ABI_CallFunctionC16(void *func, u16 param1) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -161,12 +149,10 @@ void XEmitter::ABI_CallFunctionCC16(void *func, u32 param1, u16 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -180,12 +166,10 @@ void XEmitter::ABI_CallFunctionC(void *func, u32 param1) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -200,12 +184,10 @@ void XEmitter::ABI_CallFunctionCC(void *func, u32 param1, u32 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -220,12 +202,10 @@ void XEmitter::ABI_CallFunctionCP(void *func, u32 param1, void *param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -241,12 +221,10 @@ void XEmitter::ABI_CallFunctionCCC(void *func, u32 param1, u32 param2, u32 param { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *param3) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), Imm64((u64)param3)); @@ -262,12 +240,10 @@ void XEmitter::ABI_CallFunctionCCP(void *func, u32 param1, u32 param2, void *par { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 param3, void *param4) { - ABI_AlignStack(0); MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -284,12 +260,10 @@ void XEmitter::ABI_CallFunctionCCCP(void *func, u32 param1, u32 param2, u32 para { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { - ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); @@ -304,12 +278,10 @@ void XEmitter::ABI_CallFunctionPC(void *func, void *param1, u32 param2) { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 param3) { - ABI_AlignStack(0); MOV(64, R(ABI_PARAM1), Imm64((u64)param1)); MOV(64, R(ABI_PARAM2), Imm64((u64)param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); @@ -325,13 +297,11 @@ void XEmitter::ABI_CallFunctionPPC(void *func, void *param1, void *param2, u32 p { CALL(func); } - ABI_RestoreStack(0); } // Pass a register as a parameter. void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { - ABI_AlignStack(0); if (reg1 != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R(reg1)); u64 distance = u64(func) - (u64(code) + 5); @@ -346,13 +316,11 @@ void XEmitter::ABI_CallFunctionR(void *func, X64Reg reg1) { CALL(func); } - ABI_RestoreStack(0); } // Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog) +void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2) { - ABI_AlignStack(0, noProlog); MOVTwo(64, ABI_PARAM1, reg1, ABI_PARAM2, reg2, ABI_PARAM3); u64 distance = u64(func) - (u64(code) + 5); if (distance >= 0x0000000080000000ULL && @@ -366,7 +334,6 @@ void XEmitter::ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noP { CALL(func); } - ABI_RestoreStack(0, noProlog); } void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, X64Reg temp) @@ -395,7 +362,6 @@ void XEmitter::MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); @@ -411,12 +377,10 @@ void XEmitter::ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2 { CALL(func); } - ABI_RestoreStack(0); } void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { - ABI_AlignStack(0); if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); u64 distance = u64(func) - (u64(code) + 5); @@ -431,66 +395,5 @@ void XEmitter::ABI_CallFunctionA(void *func, const Gen::OpArg &arg1) { CALL(func); } - ABI_RestoreStack(0); } -#ifdef _WIN32 -// Win64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() -{ - //we only want to do this once - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(RSI); - PUSH(RDI); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(0x28)); - //TODO: Also preserve XMM0-3? -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() -{ - ADD(64, R(RSP), Imm8(0x28)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RDI); - POP(RSI); - POP(RBX); - POP(RBP); -} - -#else -// Unix64 Specific Code - -void XEmitter::ABI_PushAllCalleeSavedRegsAndAdjustStack() -{ - PUSH(RBP); - MOV(64, R(RBP), R(RSP)); - PUSH(RBX); - PUSH(R12); - PUSH(R13); - PUSH(R14); - PUSH(R15); - SUB(64, R(RSP), Imm8(8)); -} - -void XEmitter::ABI_PopAllCalleeSavedRegsAndAdjustStack() -{ - ADD(64, R(RSP), Imm8(8)); - POP(R15); - POP(R14); - POP(R13); - POP(R12); - POP(RBX); - POP(RBP); -} - -#endif // WIN32 - diff --git a/Source/Core/Common/x64ABI.h b/Source/Core/Common/x64ABI.h index abc9236ef7..bf058bc04a 100644 --- a/Source/Core/Common/x64ABI.h +++ b/Source/Core/Common/x64ABI.h @@ -53,5 +53,7 @@ #endif // WIN32 +#define ABI_ALL_CALLEE_SAVED ((u32) ~ABI_ALL_CALLER_SAVED) + #define ABI_RETURN RAX diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index d6f0699e84..f229c1a1c3 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -281,6 +281,8 @@ private: void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); + void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + protected: inline void Write8(u8 value) {*code++ = value;} inline void Write16(u16 value) {*(u16*)code = (value); code += 2;} @@ -751,23 +753,16 @@ public: // Pass a register as a parameter. void ABI_CallFunctionR(void *func, X64Reg reg1); - void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2, bool noProlog = false); + void ABI_CallFunctionRR(void *func, X64Reg reg1, X64Reg reg2); // Helper method for the above, or can be used separately. void MOVTwo(int bits, Gen::X64Reg dst1, Gen::X64Reg src1, Gen::X64Reg dst2, Gen::X64Reg src2, Gen::X64Reg temp); - // A function that doesn't have any control over what it will do to regs, - // such as the dispatcher, should be surrounded by these. - void ABI_PushAllCalleeSavedRegsAndAdjustStack(); - void ABI_PopAllCalleeSavedRegsAndAdjustStack(); - - // A more flexible version of the above. - void ABI_PushRegistersAndAdjustStack(u32 mask, bool noProlog); - void ABI_PopRegistersAndAdjustStack(u32 mask, bool noProlog); - - unsigned int ABI_GetAlignedFrameSize(unsigned int frameSize, bool noProlog = false); - void ABI_AlignStack(unsigned int frameSize, bool noProlog = false); - void ABI_RestoreStack(unsigned int frameSize, bool noProlog = false); + // Saves/restores the registers and adjusts the stack to be aligned as + // required by the ABI, where the previous alignment was as specified. + // Push returns the size of the shadow space, i.e. the offset of the frame. + size_t ABI_PushRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); + void ABI_PopRegistersAndAdjustStack(u32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); inline int ABI_GetNumXMMRegs() { return 16; } diff --git a/Source/Core/Core/DSP/DSPEmitter.cpp b/Source/Core/Core/DSP/DSPEmitter.cpp index a7eec8a17e..188dfcaf2c 100644 --- a/Source/Core/Core/DSP/DSPEmitter.cpp +++ b/Source/Core/Core/DSP/DSPEmitter.cpp @@ -384,7 +384,9 @@ const u8 *DSPEmitter::CompileStub() void DSPEmitter::CompileDispatcher() { enterDispatcher = AlignCode16(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + // We don't use floating point (high 16 bits). + u32 registers_used = ABI_ALL_CALLEE_SAVED & 0xffff; + ABI_PushRegistersAndAdjustStack(registers_used, 8); const u8 *dispatcherLoop = GetCodePtr(); @@ -419,6 +421,6 @@ void DSPEmitter::CompileDispatcher() SetJumpTarget(exceptionExit); } //MOV(32, M(&cyclesLeft), Imm32(0)); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(registers_used, 8); RET(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index a3707dbbe1..a69d8e0f82 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -495,9 +495,9 @@ const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBloc js.fifoBytesThisBlock -= 32; MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunction((void *)&GPFifo::CheckGatherPipe); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); } u32 function = HLE::GetFunctionIndex(ops[i].address); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 1c5b78666f..40eb726db7 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -16,7 +16,7 @@ using namespace Gen; void Jit64AsmRoutineManager::Generate() { enterCode = AlignCode16(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); // Two statically allocated registers. MOV(64, R(RMEM), Imm64((u64)Memory::base)); @@ -39,7 +39,7 @@ void Jit64AsmRoutineManager::Generate() ABI_CallFunction(reinterpret_cast(&PowerPC::CheckBreakPoints)); TEST(32, M((void*)PowerPC::GetStatePtr()), Imm32(0xFFFFFFFF)); FixupBranch noBreakpoint = J_CC(CC_Z); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); SetJumpTarget(noBreakpoint); SetJumpTarget(notStepping); @@ -126,7 +126,7 @@ void Jit64AsmRoutineManager::Generate() J_CC(CC_Z, outerLoop); //Landing pad for drec space - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); GenerateCommon(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index ba9cf8b293..0c25191736 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -116,11 +116,11 @@ void Jit64::lXXx(UGeckoInstruction inst) FixupBranch noIdle = J_CC(CC_NZ); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionC((void *)&PowerPC::OnIdle, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); // ! we must continue executing of the loop after exception handling, maybe there is still 0 in r0 //MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); @@ -285,9 +285,9 @@ void Jit64::dcbz(UGeckoInstruction inst) // supposedly there are, at least for some MMU titles. Let's be careful and support it to be sure. MOV(32, M(&PC), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionR((void *)&Memory::ClearCacheLine, RSCRATCH); - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); FixupBranch exit = J(); SetJumpTarget(fast); @@ -374,7 +374,7 @@ void Jit64::stX(UGeckoInstruction inst) MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); u32 registersInUse = CallerSavedRegistersInUse(); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 32: @@ -387,7 +387,7 @@ void Jit64::stX(UGeckoInstruction inst) ABI_CallFunctionAC((void *)&Memory::Write_U8, gpr.R(s), addr); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); if (update) gpr.SetImmediate32(a, addr); return; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 2630395630..dd751c27bf 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -87,9 +87,7 @@ void Jit64::psq_l(UGeckoInstruction inst) if (inst.W) OR(32, R(RSCRATCH2), Imm8(8)); - ABI_AlignStack(0); CALLptr(MScaled(RSCRATCH2, SCALE_8, (u32)(u64)asm_routines.pairedLoadQuantized)); - ABI_RestoreStack(0); // MEMCHECK_START // FIXME: MMU does not work here because of unsafe memory access diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index d5cce9882e..dbce5dfb85 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -110,9 +110,9 @@ void CommonAsmRoutines::GenFrsqrte() SetJumpTarget(complex1); SetJumpTarget(complex2); SetJumpTarget(complex3); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunction((void *)&MathUtil::ApproximateReciprocalSquareRoot); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); } @@ -169,9 +169,9 @@ void CommonAsmRoutines::GenFres() SetJumpTarget(complex1); SetJumpTarget(complex2); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunction((void *)&MathUtil::ApproximateReciprocal); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, false); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); RET(); } @@ -258,9 +258,10 @@ void CommonAsmRoutines::GenQuantizedStores() SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH); FixupBranch skip_complex = J(true); SetJumpTarget(too_complex); - ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA); - ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, true); + ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8); SetJumpTarget(skip_complex); RET(); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index c1a6436e62..26b8b50d60 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -56,10 +56,8 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re X64Reg dataReg = (X64Reg)info.regOperandReg; // It's a read. Easy. - // It ought to be necessary to align the stack here. Since it seems to not - // affect anybody, I'm not going to add it just to be completely safe about - // performance. - ABI_PushRegistersAndAdjustStack(registersInUse, true); + // RSP alignment here is 8 due to the call. + ABI_PushRegistersAndAdjustStack(registersInUse, 8); if (addrReg != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg)); @@ -91,7 +89,7 @@ const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info, u32 re MOV(32, R(dataReg), R(ABI_RETURN)); } - ABI_PopRegistersAndAdjustStack(registersInUse, true); + ABI_PopRegistersAndAdjustStack(registersInUse, 8); RET(); return trampoline; } @@ -115,7 +113,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(pc)); - ABI_PushRegistersAndAdjustStack(registersInUse, true); + ABI_PushRegistersAndAdjustStack(registersInUse, 8); MOVTwo(64, ABI_PARAM1, dataReg, ABI_PARAM2, addrReg, ABI_PARAM3); @@ -140,7 +138,7 @@ const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info, u32 r break; } - ABI_PopRegistersAndAdjustStack(registersInUse, true); + ABI_PopRegistersAndAdjustStack(registersInUse, 8); RET(); return trampoline; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index be43680e88..6b80fd853d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -204,9 +204,9 @@ private: void CallLambda(int sbits, const std::function* lambda) { - m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, false); + m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, 0); m_code->ABI_CallLambdaC(lambda, m_address); - m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, false); + m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, 0); MoveOpArgToReg(sbits, R(ABI_RETURN)); } @@ -305,7 +305,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, } else { - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break; @@ -313,7 +313,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break; case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); MEMCHECK_START @@ -350,7 +350,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, FixupBranch fast = J_CC(CC_Z, true); - ABI_PushRegistersAndAdjustStack(registersInUse, false); + ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) { case 64: @@ -366,7 +366,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, false); + ABI_PopRegistersAndAdjustStack(registersInUse, 0); MEMCHECK_START @@ -470,25 +470,25 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce FixupBranch fast = J_CC(CC_Z, true); // PC is used by memory watchpoints (if enabled) or to print accurate PC locations in debug logs MOV(32, PPCSTATE(pc), Imm32(jit->js.compilerPC)); - bool noProlog = (0 != (flags & SAFE_LOADSTORE_NO_PROLOG)); + size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; bool swap = !(flags & SAFE_LOADSTORE_NO_SWAP); - ABI_PushRegistersAndAdjustStack(registersInUse, noProlog); + ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment); switch (accessSize) { case 64: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr); break; case 32: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr); break; case 16: - ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); + ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr); break; case 8: - ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); + ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr); break; } - ABI_PopRegistersAndAdjustStack(registersInUse, noProlog); + ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment); FixupBranch exit = J(); SetJumpTarget(fast); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0, swap); diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index a227b6da52..3f509e8d32 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -584,7 +584,8 @@ void VertexLoader::CompileVertexTranslator() PanicAlert("Trying to recompile a vertex translator"); m_compiledCode = GetCodePtr(); - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + // We don't use any callee saved registers or anything but RAX. + ABI_PushRegistersAndAdjustStack(0, 8); // Start loop here const u8 *loop_start = GetCodePtr(); @@ -845,7 +846,7 @@ void VertexLoader::CompileVertexTranslator() SUB(32, MatR(RAX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(0, 8); RET(); #endif }