diff --git a/CMakeLists.txt b/CMakeLists.txt index e98c445f53..e45a6a55af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(USE_UPNP "Enables UPnP port mapping support" ON) option(DISABLE_WX "Disable wxWidgets (use Qt or CLI interface)" OFF) option(ENABLE_QT "Enable Qt (use the experimental Qt interface)" OFF) option(ENABLE_PCH "Use PCH to speed up compilation" ON) +option(ENABLE_PIE "Build a Position-Independent Executable (PIE)" ON) option(ENABLE_LTO "Enables Link Time Optimization" OFF) option(ENABLE_GENERIC "Enables generic build that should run on any little-endian host" OFF) if(APPLE) @@ -218,6 +219,11 @@ if(UNIX AND NOT APPLE) check_and_add_flag(VISIBILITY_HIDDEN -fvisibility=hidden) endif() +if(ENABLE_PIE) + add_definitions(-fPIE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") +endif() + if(ENABLE_LTO) check_and_add_flag(LTO -flto) if(CMAKE_CXX_COMPILER_ID STREQUAL GNU) diff --git a/Source/Core/Common/CodeBlock.h b/Source/Core/Common/CodeBlock.h index 18b1af60f2..c982245912 100644 --- a/Source/Core/Common/CodeBlock.h +++ b/Source/Core/Common/CodeBlock.h @@ -28,10 +28,10 @@ public: virtual ~CodeBlock() { if (region) FreeCodeSpace(); } // Call this before you generate any code. - void AllocCodeSpace(int size) + void AllocCodeSpace(int size, void* hint = nullptr) { region_size = size; - region = (u8*)AllocateExecutableMemory(region_size); + region = (u8*)AllocateExecutableMemory(region_size, hint); T::SetCodePtr(region); } diff --git a/Source/Core/Common/MemoryUtil.cpp b/Source/Core/Common/MemoryUtil.cpp index 510b258b09..e4563e3409 100644 --- a/Source/Core/Common/MemoryUtil.cpp +++ b/Source/Core/Common/MemoryUtil.cpp @@ -27,71 +27,29 @@ #endif #endif -// Valgrind doesn't support MAP_32BIT. -// Uncomment the following line to be able to run Dolphin in Valgrind. -//#undef MAP_32BIT - -#if !defined(_WIN32) && defined(_M_X86_64) && !defined(MAP_32BIT) -#include -#define PAGE_MASK (getpagesize() - 1) -#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK)) -#endif - -// This is purposely not a full wrapper for virtualalloc/mmap, but it -// provides exactly the primitive operations that Dolphin needs. - -void* AllocateExecutableMemory(size_t size, bool low) +void* AllocateExecutableMemory(size_t size, void* map_hint) { #if defined(_WIN32) void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); #else - static char *map_hint = nullptr; -#if defined(_M_X86_64) && !defined(MAP_32BIT) - // This OS has no flag to enforce allocation below the 4 GB boundary, - // but if we hint that we want a low address it is very likely we will - // get one. - // An older version of this code used MAP_FIXED, but that has the side - // effect of discarding already mapped pages that happen to be in the - // requested virtual memory range (such as the emulated RAM, sometimes). - if (low && (!map_hint)) - map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */ -#endif void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_ANON | MAP_PRIVATE -#if defined(_M_X86_64) && defined(MAP_32BIT) - | (low ? MAP_32BIT : 0) -#endif - , -1, 0); + MAP_ANON | MAP_PRIVATE, -1, 0); #endif /* defined(_WIN32) */ - // printf("Mapped executable memory at %p (size %ld)\n", ptr, - // (unsigned long)size); - #ifdef _WIN32 if (ptr == nullptr) - { #else if (ptr == MAP_FAILED) +#endif { ptr = nullptr; -#endif - PanicAlert("Failed to allocate executable memory. If you are running Dolphin in Valgrind, try '#undef MAP_32BIT'."); + PanicAlert("Failed to allocate executable memory."); } -#if !defined(_WIN32) && defined(_M_X86_64) && !defined(MAP_32BIT) - else - { - if (low) - { - map_hint += size; - map_hint = (char*)round_page(map_hint); /* round up to the next page */ - // printf("Next map will (hopefully) be at %p\n", map_hint); - } - } -#endif -#if _M_X86_64 - if ((u64)ptr >= 0x80000000 && low == true) - PanicAlert("Executable memory ended up above 2GB!"); +#ifdef _X86_64 + ptrdiff_t ofs = (u8*)ptr - (u8*)map_hint; + if (ofs < -0x80000000ll || ofs + size > 0x80000000ll) + PanicAlert("Executable range can't be used for RIP-relative addressing."); #endif return ptr; @@ -117,18 +75,12 @@ void* AllocateMemoryPages(size_t size) void* AllocateAlignedMemory(size_t size, size_t alignment) { -#ifdef _WIN32 - void* ptr = _aligned_malloc(size, alignment); -#else void* ptr = nullptr; - if (posix_memalign(&ptr, alignment, size) != 0) - ERROR_LOG(MEMMAP, "Failed to allocate aligned memory"); +#ifdef _WIN32 + if (!(ptr = _aligned_malloc(size, alignment))) +#else + if (posix_memalign(&ptr, alignment, size)) #endif - - // printf("Mapped memory at %p (size %ld)\n", ptr, - // (unsigned long)size); - - if (ptr == nullptr) PanicAlert("Failed to allocate aligned memory"); return ptr; @@ -136,23 +88,12 @@ void* AllocateAlignedMemory(size_t size, size_t alignment) void FreeMemoryPages(void* ptr, size_t size) { - if (ptr) - { - bool error_occurred = false; - #ifdef _WIN32 - if (!VirtualFree(ptr, 0, MEM_RELEASE)) - error_occurred = true; + if (ptr && !VirtualFree(ptr, 0, MEM_RELEASE)) #else - int retval = munmap(ptr, size); - - if (retval != 0) - error_occurred = true; + if (ptr && munmap(ptr, size)) #endif - - if (error_occurred) - PanicAlert("FreeMemoryPages failed!\n%s", GetLastErrorMsg().c_str()); - } + PanicAlert("FreeMemoryPages failed!\n%s", GetLastErrorMsg().c_str()); } void FreeAlignedMemory(void* ptr) @@ -169,58 +110,34 @@ void FreeAlignedMemory(void* ptr) void ReadProtectMemory(void* ptr, size_t size) { - bool error_occurred = false; - #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue)) - error_occurred = true; #else - int retval = mprotect(ptr, size, PROT_NONE); - - if (retval != 0) - error_occurred = true; + if (mprotect(ptr, size, PROT_NONE)) #endif - - if (error_occurred) PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) { - bool error_occurred = false; - #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue)) - error_occurred = true; #else - int retval = mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ); - - if (retval != 0) - error_occurred = true; + if (mprotect(ptr, size, PROT_READ | (allowExecute ? PROT_EXEC : 0))) #endif - - if (error_occurred) PanicAlert("WriteProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute) { - bool error_occurred = false; - #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue)) - error_occurred = true; #else - int retval = mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ); - - if (retval != 0) - error_occurred = true; + if (mprotect(ptr, size, PROT_READ | PROT_WRITE | (allowExecute ? PROT_EXEC : 0))) #endif - - if (error_occurred) PanicAlert("UnWriteProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } @@ -236,7 +153,8 @@ std::string MemUsage() // Print information about the memory usage of the process. hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, processID); - if (nullptr == hProcess) return "MemUsage Error"; + if (nullptr == hProcess) + return "MemUsage Error"; if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) Ret = StringFromFormat("%s K", ThousandSeparate(pmc.WorkingSetSize / 1024, 7).c_str()); diff --git a/Source/Core/Common/MemoryUtil.h b/Source/Core/Common/MemoryUtil.h index e986069d24..248937611d 100644 --- a/Source/Core/Common/MemoryUtil.h +++ b/Source/Core/Common/MemoryUtil.h @@ -7,7 +7,7 @@ #include #include -void* AllocateExecutableMemory(size_t size, bool low = true); +void* AllocateExecutableMemory(size_t size, void* map_hint); void* AllocateMemoryPages(size_t size); void FreeMemoryPages(void* ptr, size_t size); void* AllocateAlignedMemory(size_t size,size_t alignment); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 334ba66429..24331c2db1 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -218,17 +218,17 @@ inline OpArg M(const T* ptr) {return OpArg((u64)(const void*)ptr, (int)SCALE_ inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} -inline OpArg MDisp(X64Reg value, int offset) +inline OpArg MDisp(X64Reg value, ptrdiff_t offset) { - return OpArg((u32)offset, SCALE_ATREG, value); + return OpArg(offset, SCALE_ATREG, value); } -inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) +inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, ptrdiff_t offset) { return OpArg(offset, scale, base, scaled); } -inline OpArg MScaled(X64Reg scaled, int scale, int offset) +inline OpArg MScaled(X64Reg scaled, int scale, ptrdiff_t offset) { if (scale == SCALE_1) return OpArg(offset, SCALE_ATREG, scaled); @@ -247,17 +247,10 @@ inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);} inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} -inline u32 PtrOffset(const void* ptr, const void* base) +inline bool FitsInS32(const ptrdiff_t distance) { - s64 distance = (s64)ptr-(s64)base; - if (distance >= 0x80000000LL || - distance < -0x80000000LL) - { - _assert_msg_(DYNA_REC, 0, "pointer offset out of range"); - return 0; - } - - return (u32)distance; + return distance < 0x80000000LL && + distance >= -0x80000000LL; } //usage: int a[]; ARRAY_OFFSET(a,10) diff --git a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp index 205ef6e4c8..0ec62f9e95 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp @@ -20,14 +20,16 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) AND(8, R(AL), Imm8(DSP_STACK_MASK)); MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); - X64Reg tmp1; + X64Reg tmp1, tmp2; gpr.getFreeXReg(tmp1); + gpr.getFreeXReg(tmp2); //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); MOVZX(64, 8, RAX, R(AL)); - MOV(16, MComplex(EAX, EAX, 1, - PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr)), R(tmp1)); + MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg])); + MOV(16, MComplex(tmp2, EAX, SCALE_2, 0), R(tmp1)); gpr.putXReg(tmp1); + gpr.putXReg(tmp2); } //clobbers: @@ -37,13 +39,15 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg) { //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); - X64Reg tmp1; + X64Reg tmp1, tmp2; gpr.getFreeXReg(tmp1); + gpr.getFreeXReg(tmp2); MOVZX(64, 8, RAX, R(AL)); - MOV(16, R(tmp1), MComplex(EAX, EAX, 1, - PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr))); + MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg])); + MOV(16, R(tmp1), MComplex(tmp2, EAX, SCALE_2, 0)); MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); gpr.putXReg(tmp1); + gpr.putXReg(tmp2); //g_dsp.reg_stack_ptr[stack_reg]--; //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index fa4acdc922..f194098bbc 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -40,7 +40,7 @@ void Jit64AsmRoutineManager::Generate() // Two statically allocated registers. //MOV(64, R(RMEM), Imm64((u64)Memory::physical_base)); - MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); + MOV(64, R(RPPCSTATE), ImmPtr(PPCSTATE_BASE)); const u8* outerLoop = GetCodePtr(); ABI_PushRegistersAndAdjustStack({}, 0); @@ -103,9 +103,9 @@ void Jit64AsmRoutineManager::Generate() // optimizations safe, because IR and DR are usually set/cleared together. // TODO: Branching based on the 20 most significant bits of instruction // addresses without translating them is wrong. - u64 icache = (u64)jit->GetBlockCache()->iCache.data(); - u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data(); - u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data(); + u8* icache = jit->GetBlockCache()->iCache.data(); + u8* icacheVmem = jit->GetBlockCache()->iCacheVMEM.data(); + u8* icacheEx = jit->GetBlockCache()->iCacheEx.data(); u32 mask = 0; FixupBranch no_mem; FixupBranch exit_mem; @@ -117,13 +117,13 @@ void Jit64AsmRoutineManager::Generate() no_mem = J_CC(CC_NZ); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - if (icache <= INT_MAX) + if (FitsInS32(PPCSTATE_OFS(icache))) { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache)); + MOV(32, R(RSCRATCH), MPIC(icache, RSCRATCH)); } else { - MOV(64, R(RSCRATCH2), Imm64(icache)); + MOV(64, R(RSCRATCH2), ImmPtr(icache)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -132,13 +132,14 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - if (icacheVmem <= INT_MAX) + + if (FitsInS32(PPCSTATE_OFS(icacheVmem))) { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem)); + MOV(32, R(RSCRATCH), MPIC(icacheVmem, RSCRATCH)); } else { - MOV(64, R(RSCRATCH2), Imm64(icacheVmem)); + MOV(64, R(RSCRATCH2), ImmPtr(icacheVmem)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -149,14 +150,13 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); - - if (icacheEx <= INT_MAX) + if (FitsInS32(PPCSTATE_OFS(icacheEx))) { - MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx)); + MOV(32, R(RSCRATCH), MPIC(icacheEx, RSCRATCH)); } else { - MOV(64, R(RSCRATCH2), Imm64(icacheEx)); + MOV(64, R(RSCRATCH2), ImmPtr(icacheEx)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -169,16 +169,17 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch notfound = J_CC(CC_L); //grab from list and jump to it - u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers(); - if (codePointers <= INT_MAX) + const u8** codePointers = jit->GetBlockCache()->GetCodePointers(); + if (FitsInS32(PPCSTATE_OFS(codePointers))) { - JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers)); + JMPptr(MPIC(codePointers, RSCRATCH, SCALE_8)); } else { - MOV(64, R(RSCRATCH2), Imm64(codePointers)); + MOV(64, R(RSCRATCH2), ImmPtr(codePointers)); JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0)); } + SetJumpTarget(notfound); //Ok, no block, let's jit @@ -271,7 +272,7 @@ void Jit64AsmRoutineManager::GenerateCommon() CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); FixupBranch skip_fast_write = J_CC(CC_NE, false); MOV(32, RSCRATCH, M(&m_gatherPipeCount)); - MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1); + MOV(8, MPIC(&m_gatherPipe, RSCRATCH), ABI_PARAM1); ADD(32, 1, M(&m_gatherPipeCount)); RET(); SetJumpTarget(skip_fast_write); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index 8c33c8bace..6b0ae28ebe 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -34,7 +34,7 @@ public: m_stack_top = stack_top; // NOTE: When making large additions to the AsmCommon code, you might // want to ensure this number is big enough. - AllocCodeSpace(16384); + AllocCodeSpace(16384, PPCSTATE_BASE); Generate(); WriteProtect(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 407a39b867..00d8fd52b3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -45,7 +45,7 @@ void Jit64::GenerateOverflow() //rare). static const u8 ovtable[4] = {0, 0, XER_SO_MASK, XER_SO_MASK}; MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov)); - MOV(8, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)ovtable)); + MOV(8, R(RSCRATCH), MPIC(ovtable, RSCRATCH)); MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH)); SetJumpTarget(exit); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 4d875323eb..824e7f96e6 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -132,13 +132,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst) { // One value CVTSD2SS(XMM0, fpr.R(s)); - CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); + CALLptr(MPIC(asm_routines.singleStoreQuantized, RSCRATCH, SCALE_8)); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); + CALLptr(MPIC(asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8)); } if (update && jo.memcheck) @@ -306,7 +306,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst) AND(32, R(RSCRATCH2), gqr); MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8]))); + CALLptr(MPIC(&asm_routines.pairedLoadQuantized[w * 8], RSCRATCH, SCALE_8)); MemoryExceptionCheck(); CVTPS2PD(fpr.RX(s), R(XMM0)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index d7c56fbe10..e798237311 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -458,7 +458,7 @@ void Jit64::mtcrf(UGeckoInstruction inst) SHR(32, R(RSCRATCH), Imm8(28 - (i * 4))); if (i != 0) AND(32, R(RSCRATCH), Imm8(0xF)); - MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH, SCALE_8)); MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } @@ -493,7 +493,7 @@ void Jit64::mcrxr(UGeckoInstruction inst) // [SO OV CA 0] << 3 SHL(32, R(RSCRATCH), Imm8(4)); - MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable)); + MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); // Clear XER[0-3] diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index 64adf9b4de..d7fb70f5fe 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -24,10 +24,8 @@ void CommonAsmRoutines::GenFifoWrite(int size) const void* start = GetCodePtr(); // Assume value in RSCRATCH - u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe; - _assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!"); MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH); + SwapAndStore(size, MPIC(GPFifo::m_gatherPipe, RSCRATCH2), RSCRATCH); ADD(32, R(RSCRATCH2), Imm8(size >> 3)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2)); RET(); @@ -68,8 +66,8 @@ void CommonAsmRoutines::GenFrsqrte() SHR(64, R(RSCRATCH), Imm8(37)); AND(32, R(RSCRATCH), Imm32(0x7FF)); - IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); - MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); + IMUL(32, RSCRATCH, MPIC(MathUtil::frsqrte_expected_dec, RSCRATCH_EXTRA, SCALE_4)); + MOV(32, R(RSCRATCH_EXTRA), MPIC(MathUtil::frsqrte_expected_base, RSCRATCH_EXTRA, SCALE_4)); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; @@ -136,11 +134,11 @@ void CommonAsmRoutines::GenFres() AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); + IMUL(32, RSCRATCH, MPIC(MathUtil::fres_expected_dec, RSCRATCH2, SCALE_4)); ADD(32, R(RSCRATCH), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); + MOV(32, R(RSCRATCH2), MPIC(MathUtil::fres_expected_base, RSCRATCH2, SCALE_4)); SUB(32, R(RSCRATCH2), R(RSCRATCH)); SHL(64, R(RSCRATCH2), Imm8(29)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 @@ -199,7 +197,7 @@ void CommonAsmRoutines::GenMfcr() // SO: Bit 61 set; set flag bit 0 // LT: Bit 62 set; set flag bit 3 SHR(64, R(cr_val), Imm8(61)); - OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable)); + OR(32, R(dst), MPIC(m_flagTable, cr_val, SCALE_4)); } RET(); @@ -247,7 +245,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedU8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -262,7 +260,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedS8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -278,7 +276,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedU16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); if (cpu_info.bSSE4_1) @@ -310,7 +308,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedS16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -355,7 +353,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleU8 = AlignCode4(); // Used by MKWii SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M(&m_255)); @@ -365,7 +363,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleS8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); MAXSS(XMM0, M(&m_m128)); MINSS(XMM0, M(&m_127)); CVTTSS2SI(RSCRATCH, R(XMM0)); @@ -374,7 +372,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleU16 = AlignCode4(); // Used by MKWii SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M(m_65535)); @@ -384,7 +382,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleS16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); + MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); MAXSS(XMM0, M(&m_m32768)); MINSS(XMM0, M(&m_32767)); CVTTSS2SI(RSCRATCH, R(XMM0)); @@ -484,7 +482,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); RET(); @@ -495,7 +493,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -523,7 +521,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); RET(); @@ -534,7 +532,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -557,7 +555,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); RET(); @@ -568,7 +566,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -590,7 +588,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); MULPS(XMM0, R(XMM1)); RET(); @@ -601,7 +599,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); + MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); UNPCKLPS(XMM0, M(m_one)); RET(); diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 17007be31f..51763de8a9 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1611,7 +1611,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->OR(32, R(RSCRATCH), Imm8(w << 3)); Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized))); + Jit->CALLptr(MPIC(Jit->asm_routines.pairedLoadQuantized, RSCRATCH, SCALE_8)); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1669,7 +1669,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedStoreQuantized))); + Jit->CALLptr(MPIC(Jit->asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8)); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index 1edb0591bc..aea7d9446f 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -250,7 +250,7 @@ void JitIL::Init() UpdateMemoryOptions(); trampolines.Init(jo.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE); - AllocCodeSpace(CODE_SIZE); + AllocCodeSpace(CODE_SIZE, PPCSTATE_BASE); blocks.Init(); asm_routines.Init(nullptr); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 133350cd98..8476a6dbb5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -43,6 +43,18 @@ // to address as much as possible in a one-byte offset form. #define RPPCSTATE RBP +namespace Gen +{ + +inline OpArg MPIC(const void* address, X64Reg scale_reg, int scale = SCALE_1) +{ + ptrdiff_t offset = PPCSTATE_OFS(address); + _dbg_assert_(DYNA_REC, FitsInS32(offset)); + return MComplex(RPPCSTATE, scale_reg, scale, offset); +} + +} + // Use these to control the instruction selection // #define INSTRUCTION_START FallBackToInterpreter(inst); return; // #define INSTRUCTION_START PPCTables::CountInstruction(inst); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index e4a95b272e..404c673d20 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -173,11 +173,11 @@ private: u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0)); + MoveOpArgToReg(sbits, MatR(RSCRATCH)); } else { - m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0)); + m_code->MOVZX(32, sbits, m_dst_reg, MatR(RSCRATCH)); m_code->AND(32, R(m_dst_reg), Imm32(mask)); if (m_sign_extend) m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 21d19a88f6..5dc67d9f3c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -9,15 +9,15 @@ #include "Common/BitSet.h" #include "Common/CPUDetect.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/PowerPC.h" namespace MMIO { class Mapping; } // We offset by 0x80 because the range of one byte memory offsets is // -0x80..0x7f. -#define PPCSTATE(x) MDisp(RPPCSTATE, \ - (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) -// In case you want to disable the ppcstate register: -// #define PPCSTATE(x) M(&PowerPC::ppcState.x) +#define PPCSTATE_BASE ((u8*)&PowerPC::ppcState + 0x80) +#define PPCSTATE_OFS(x) ((u8*)(x) - PPCSTATE_BASE) +#define PPCSTATE(x) MDisp(RPPCSTATE, PPCSTATE_OFS(&PowerPC::ppcState.x)) #define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) #define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) #define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) @@ -31,7 +31,7 @@ private: bool m_enabled = false; public: bool Enabled() { return m_enabled; } - void Init(int size) { AllocCodeSpace(size); m_enabled = true; } + void Init(int size) { AllocCodeSpace(size, PPCSTATE_BASE); m_enabled = true; } void Shutdown() { FreeCodeSpace(); m_enabled = false; } }; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index f9b7e219e2..0a02764142 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -22,7 +22,7 @@ using namespace Gen; void TrampolineCache::Init(int size) { - AllocCodeSpace(size); + AllocCodeSpace(size, PPCSTATE_BASE); } void TrampolineCache::ClearCodeSpace() diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 886380e198..adbdf76a77 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -19,6 +19,7 @@ #include "Common/CPUDetect.h" #include "Common/x64Emitter.h" +#include "Core/PowerPC/JitCommon/Jit_Util.h" namespace Gen { @@ -94,7 +95,7 @@ protected: memset(&cpu_info, 0xFF, sizeof (cpu_info)); emitter.reset(new X64CodeBlock()); - emitter->AllocCodeSpace(4096); + emitter->AllocCodeSpace(4096, PPCSTATE_BASE); code_buffer = emitter->GetWritableCodePtr(); disasm.reset(new disassembler);