diff --git a/Source/Core/Common/Arm64Emitter.h b/Source/Core/Common/Arm64Emitter.h index 19d63a8dae..bd2c4822d5 100644 --- a/Source/Core/Common/Arm64Emitter.h +++ b/Source/Core/Common/Arm64Emitter.h @@ -1009,12 +1009,20 @@ public: void MOVP2R(ARM64Reg Rd, P* ptr) { ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers"); - MOVI2R(Rd, (uintptr_t)ptr); + MOVI2R(Rd, reinterpret_cast(ptr)); + } + template + // Given an address, stores the page address into a register and returns the page-relative offset + s32 MOVPage2R(ARM64Reg Rd, P* ptr) + { + ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers"); + MOVI2R(Rd, reinterpret_cast(ptr) & ~0xFFFULL); + return static_cast(reinterpret_cast(ptr) & 0xFFFULL); } - // Wrapper around AND x, y, imm etc. - // If you are sure the imm will work, preferably construct a LogicalImm directly instead, - // since that is constexpr and thus can be done at compile-time for constant values. + // Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded + // without a scratch register, preferably construct a LogicalImm directly instead, + // since that is constexpr and thus can be done at compile time for constant values. void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) @@ -1024,6 +1032,7 @@ public: void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch); + // Wrappers around arithmetic operations with an immediate. void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags, ARM64Reg scratch); void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index a2aae7c2d3..4afd8ac92e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -915,8 +915,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SetJumpTarget(exception); LDR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(msr)); TBZ(ARM64Reg::W30, 15, done_here); // MSR.EE - MOVP2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause); - LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, 0); + LDR(IndexType::Unsigned, ARM64Reg::W30, ARM64Reg::X30, + MOVPage2R(ARM64Reg::X30, &ProcessorInterface::m_InterruptCause)); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH; @@ -951,8 +951,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) SetJumpTarget(exception); LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(msr)); TBZ(WA, 15, done_here); // MSR.EE - MOVP2R(XA, &ProcessorInterface::m_InterruptCause); - LDR(IndexType::Unsigned, WA, XA, 0); + LDR(IndexType::Unsigned, WA, XA, MOVPage2R(XA, &ProcessorInterface::m_InterruptCause)); constexpr u32 cause_mask = ProcessorInterface::INT_CAUSE_CP | ProcessorInterface::INT_CAUSE_PE_TOKEN | ProcessorInterface::INT_CAUSE_PE_FINISH; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index a1c292afdf..05d3a880d7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -42,10 +42,9 @@ void JitArm64::GenerateAsm() // Swap the stack pointer, so we have proper guard pages. ADD(ARM64Reg::X0, ARM64Reg::SP, 0); - MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); - STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); - MOVP2R(ARM64Reg::X1, &m_stack_pointer); - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); + STR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, + MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); + LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, MOVPage2R(ARM64Reg::X1, &m_stack_pointer)); FixupBranch no_fake_stack = CBZ(ARM64Reg::X0); ADD(ARM64Reg::SP, ARM64Reg::X0, 0); SetJumpTarget(no_fake_stack); @@ -167,8 +166,7 @@ void JitArm64::GenerateAsm() // Check the state pointer to see if we are exiting // Gets checked on at the end of every slice - MOVP2R(ARM64Reg::X0, CPU::GetStatePtr()); - LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, 0); + LDR(IndexType::Unsigned, ARM64Reg::W0, ARM64Reg::X0, MOVPage2R(ARM64Reg::X0, CPU::GetStatePtr())); CMP(ARM64Reg::W0, 0); FixupBranch Exit = B(CC_NEQ); @@ -186,8 +184,8 @@ void JitArm64::GenerateAsm() SetJumpTarget(Exit); // Reset the stack pointer, as the BLR optimization have touched it. - MOVP2R(ARM64Reg::X1, &m_saved_stack_pointer); - LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, 0); + LDR(IndexType::Unsigned, ARM64Reg::X0, ARM64Reg::X1, + MOVPage2R(ARM64Reg::X1, &m_saved_stack_pointer)); ADD(ARM64Reg::SP, ARM64Reg::X0, 0); m_float_emit.ABI_PopRegisters(regs_to_save_fpr, ARM64Reg::X30); @@ -526,9 +524,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -544,9 +542,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -561,9 +559,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -578,9 +576,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -607,9 +605,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -625,9 +623,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -642,9 +640,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -659,9 +657,9 @@ void JitArm64::GenerateQuantizedLoads() float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); - MOVP2R(ARM64Reg::X2, &m_dequantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_dequantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); RET(ARM64Reg::X30); } @@ -727,9 +725,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -746,9 +744,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -765,9 +763,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedU16 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -783,9 +781,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storePairedS16 = GetCodePtr(); // Used by Viewtiful Joe's intro movie { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1, 0); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -812,9 +810,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -831,9 +829,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS8 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); @@ -850,9 +848,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZU(32, ARM64Reg::D0, ARM64Reg::D0); @@ -868,9 +866,9 @@ void JitArm64::GenerateQuantizedStores() } const u8* storeSingleS16 = GetCodePtr(); { - MOVP2R(ARM64Reg::X2, &m_quantizeTableS); + const s32 load_offset = MOVPage2R(ARM64Reg::X2, &m_quantizeTableS); ADD(scale_reg, ARM64Reg::X2, scale_reg, ArithOption(scale_reg, ShiftType::LSL, 3)); - float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, 0); + float_emit.LDR(32, IndexType::Unsigned, ARM64Reg::D1, scale_reg, load_offset); float_emit.FMUL(32, ARM64Reg::D0, ARM64Reg::D0, ARM64Reg::D1); float_emit.FCVTZS(32, ARM64Reg::D0, ARM64Reg::D0); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp index 1a76814162..1674687a5f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit_Util.cpp @@ -34,18 +34,18 @@ public: } private: - void StoreFromRegister(int sbits, ARM64Reg reg) + void StoreFromRegister(int sbits, ARM64Reg reg, s32 offset) { switch (sbits) { case 8: - m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STRB(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; case 16: - m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STRH(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; case 32: - m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, 0); + m_emit->STR(IndexType::Unsigned, reg, ARM64Reg::X0, offset); break; default: ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOWriteCodeGenerator!", sbits); @@ -55,20 +55,20 @@ private: void WriteRegToAddr(int sbits, const void* ptr, u32 mask) { - m_emit->MOVP2R(ARM64Reg::X0, ptr); + const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr); // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, // then mask, then sign extend if needed (1 instr vs. ~4). - u32 all_ones = (1ULL << sbits) - 1; + const u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - StoreFromRegister(sbits, m_src_reg); + StoreFromRegister(sbits, m_src_reg, offset); } else { m_emit->ANDI2R(ARM64Reg::W1, m_src_reg, mask, ARM64Reg::W1); - StoreFromRegister(sbits, ARM64Reg::W1); + StoreFromRegister(sbits, ARM64Reg::W1, offset); } } @@ -123,24 +123,24 @@ private: m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1); } - void LoadToRegister(int sbits, bool dont_extend) + void LoadToRegister(int sbits, bool dont_extend, s32 offset) { switch (sbits) { case 8: if (m_sign_extend && !dont_extend) - m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRSB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); else - m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRB(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; case 16: if (m_sign_extend && !dont_extend) - m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRSH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); else - m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDRH(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; case 32: - m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, 0); + m_emit->LDR(IndexType::Unsigned, m_dst_reg, ARM64Reg::X0, offset); break; default: ASSERT_MSG(DYNA_REC, false, "Unknown size {} passed to MMIOReadCodeGenerator!", sbits); @@ -150,19 +150,19 @@ private: void LoadAddrMaskToReg(int sbits, const void* ptr, u32 mask) { - m_emit->MOVP2R(ARM64Reg::X0, ptr); + const s32 offset = m_emit->MOVPage2R(ARM64Reg::X0, ptr); // If we do not need to mask, we can do the sign extend while loading // from memory. If masking is required, we have to first zero extend, // then mask, then sign extend if needed (1 instr vs. ~4). - u32 all_ones = (1ULL << sbits) - 1; + const u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - LoadToRegister(sbits, false); + LoadToRegister(sbits, false, offset); } else { - LoadToRegister(sbits, true); + LoadToRegister(sbits, true, offset); m_emit->ANDI2R(m_dst_reg, m_dst_reg, mask, ARM64Reg::W0); if (m_sign_extend) m_emit->SBFM(m_dst_reg, m_dst_reg, 0, sbits - 1);