diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index c44fe22825..7725b63fd8 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -755,6 +755,10 @@ void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) Write8(0x0F); Write8(0xB7); } + else if (sbits == 32 && dbits == 64) + { + Write8(0x8B); + } else { Crash(); @@ -1055,10 +1059,8 @@ void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(t void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2) { #ifdef _DEBUG -#ifndef _M_X64 _assert_msg_(DYNA_REC, !a1.IsSimpleReg() || !a2.IsSimpleReg() || a1.GetSimpleReg() != a2.GetSimpleReg(), "Redundant MOV @ %p - bug in JIT?", - code); -#endif + code); #endif WriteNormalOp(this, bits, nrmMOV, a1, a2); } diff --git a/Source/Core/Core/Src/DSP/DSPEmitter.h b/Source/Core/Core/Src/DSP/DSPEmitter.h index a1be7c3ac6..acf0ab0fd2 100644 --- a/Source/Core/Core/Src/DSP/DSPEmitter.h +++ b/Source/Core/Core/Src/DSP/DSPEmitter.h @@ -107,8 +107,8 @@ public: void dsp_op_write_reg_imm(int reg, u16 val); void dsp_conditional_extend_accum(int reg); void dsp_conditional_extend_accum_imm(int reg, u16 val); + void dsp_op_read_reg_dont_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE); void dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE); - void dsp_op_read_reg_and_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend = NONE); // Commands void dar(const UDSPInstruction opc); diff --git a/Source/Core/Core/Src/DSP/DspIntArithmetic.cpp b/Source/Core/Core/Src/DSP/DspIntArithmetic.cpp index a88028dd78..4d2b5be3ac 100644 --- a/Source/Core/Core/Src/DSP/DspIntArithmetic.cpp +++ b/Source/Core/Core/Src/DSP/DspIntArithmetic.cpp @@ -451,7 +451,7 @@ void addp(const UDSPInstruction opc) dsp_set_long_acc(dreg, res); res = dsp_get_long_acc(dreg); - Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res)); + Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res)); } // ADDAXL $acD, $axS.l diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp index 8bff5c0c47..04cc3aa753 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitArithmetic.cpp @@ -737,12 +737,12 @@ void DSPEmitter::addp(const UDSPInstruction opc) ADD(64, R(RAX), R(RDX)); // dsp_set_long_acc(dreg, res); // res = dsp_get_long_acc(dreg); -// Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, prod, res)); +// Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res)); if (FlagsNeeded()) { MOV(64, R(RCX), R(RAX)); set_long_acc(dreg, RCX); - Update_SR_Register64_Carry2(EAX, tmp1); + Update_SR_Register64_Carry(EAX, tmp1); } else { @@ -1557,16 +1557,17 @@ void DSPEmitter::lsrn(const UDSPInstruction opc) // acc <<= -shift; // } - CMP(64, R(RDX), Imm8(0)); + CMP(64, R(RDX), Imm8(0));//is this actually worth the branch cost? FixupBranch zero = J_CC(CC_E); - TEST(16, R(RAX), Imm16(0x3f)); + TEST(16, R(RAX), Imm16(0x3f));//is this actually worth the branch cost? FixupBranch noShift = J_CC(CC_Z); - MOVZX(64, 16, RCX, R(RAX)); - AND(16, R(RCX), Imm16(0x3f)); +//CL gets automatically masked with 0x3f on IA32/AMD64 + //MOVZX(64, 16, RCX, R(RAX)); + //AND(16, R(RCX), Imm16(0x3f)); TEST(16, R(RAX), Imm16(0x40)); FixupBranch shiftLeft = J_CC(CC_Z); NEG(16, R(RCX)); - ADD(16, R(RCX), Imm16(0x40)); + //ADD(16, R(RCX), Imm16(0x40)); SHL(64, R(RDX), R(RCX)); FixupBranch exit = J(); SetJumpTarget(shiftLeft); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp index d595b373af..70bb3618e6 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitBranch.cpp @@ -322,7 +322,8 @@ void DSPEmitter::loop(const UDSPInstruction opc) { u16 reg = opc & 0x1f; // u16 cnt = g_dsp.r[reg]; - dsp_op_read_reg(reg, RDX, ZERO); +//todo: check if we can use normal variant here + dsp_op_read_reg_dont_saturate(reg, RDX, ZERO); u16 loop_pc = compilePC + 1; CMP(16, R(EDX), Imm16(0)); @@ -391,7 +392,8 @@ void DSPEmitter::bloop(const UDSPInstruction opc) { u16 reg = opc & 0x1f; // u16 cnt = g_dsp.r[reg]; - dsp_op_read_reg(reg, RDX, ZERO); +//todo: check if we can use normal variant here + dsp_op_read_reg_dont_saturate(reg, RDX, ZERO); u16 loop_pc = dsp_imem_read(compilePC + 1); CMP(16, R(EDX), Imm16(0)); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp index 5bcd13d071..149387db6d 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitExtOps.cpp @@ -59,11 +59,8 @@ void DSPEmitter::mv(const UDSPInstruction opc) { u8 sreg = (opc & 0x3) + DSP_REG_ACL0; u8 dreg = ((opc >> 2) & 0x3); - if (sreg >= DSP_REG_ACM0) { - dsp_op_read_reg_and_saturate(sreg, RBX, ZERO); - storeIndex = dreg + DSP_REG_AXL0; - } else - pushExtValueFromReg(dreg + DSP_REG_AXL0, sreg); + dsp_op_read_reg(sreg, RBX, ZERO); + storeIndex = dreg + DSP_REG_AXL0; } // S @$arD, $acS.S @@ -80,10 +77,7 @@ void DSPEmitter::s(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1, ZERO); - else - dsp_op_read_reg(sreg, tmp1, ZERO); + dsp_op_read_reg(sreg, tmp1, ZERO); // u16 val = g_dsp.r[src]; dmem_write(tmp1); @@ -105,10 +99,7 @@ void DSPEmitter::sn(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1, ZERO); - else - dsp_op_read_reg(sreg, tmp1, ZERO); + dsp_op_read_reg(sreg, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -178,7 +169,7 @@ void DSPEmitter::ls(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -205,7 +196,7 @@ void DSPEmitter::lsn(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -231,7 +222,7 @@ void DSPEmitter::lsm(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -258,7 +249,7 @@ void DSPEmitter::lsnm(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -283,7 +274,7 @@ void DSPEmitter::sl(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -309,7 +300,7 @@ void DSPEmitter::sln(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -335,7 +326,7 @@ void DSPEmitter::slm(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -361,7 +352,7 @@ void DSPEmitter::slnm(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - dsp_op_read_reg_and_saturate(sreg + DSP_REG_ACM0, tmp1, ZERO); + dsp_op_read_reg(sreg + DSP_REG_ACM0, tmp1, ZERO); dmem_write(tmp1); gpr.putXReg(tmp1); @@ -640,15 +631,8 @@ void DSPEmitter::ldaxnm(const UDSPInstruction opc) increase_addr_reg(DSP_REG_AR3, DSP_REG_AR3); } - -// Push value from g_dsp.r[sreg] into EBX and stores the destinationindex in -// storeIndex -void DSPEmitter::pushExtValueFromReg(u16 dreg, u16 sreg) -{ - dsp_op_read_reg(sreg, RBX, ZERO); - storeIndex = dreg; -} - +// Push value from address in g_dsp.r[sreg] into EBX and stores the +// destinationindex in storeIndex void DSPEmitter::pushExtValueFromMem(u16 dreg, u16 sreg) { // u16 addr = g_dsp.r[addr]; diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp index 9f6f6f99e5..9197659aaf 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitLoadStore.cpp @@ -26,10 +26,7 @@ void DSPEmitter::srs(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (reg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(reg, tmp1, ZERO); - else - dsp_op_read_reg(reg, tmp1, ZERO); + dsp_op_read_reg(reg, tmp1, ZERO); dsp_op_read_reg(DSP_REG_CR, RAX, ZERO); SHL(16, R(EAX), Imm8(8)); OR(16, R(EAX), Imm16(opc & 0xFF)); @@ -87,10 +84,7 @@ void DSPEmitter::sr(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (reg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(reg, tmp1); - else - dsp_op_read_reg(reg, tmp1); + dsp_op_read_reg(reg, tmp1); dmem_write_imm(address, tmp1); gpr.putXReg(tmp1); @@ -213,10 +207,7 @@ void DSPEmitter::srr(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1); - else - dsp_op_read_reg(sreg, tmp1); + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(tmp1); @@ -235,10 +226,7 @@ void DSPEmitter::srrd(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1); - else - dsp_op_read_reg(sreg, tmp1); + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(tmp1); @@ -259,10 +247,7 @@ void DSPEmitter::srri(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1); - else - dsp_op_read_reg(sreg, tmp1); + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(tmp1); @@ -283,10 +268,7 @@ void DSPEmitter::srrn(const UDSPInstruction opc) X64Reg tmp1; gpr.getFreeXReg(tmp1); - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, tmp1); - else - dsp_op_read_reg(sreg, tmp1); + dsp_op_read_reg(sreg, tmp1); dsp_op_read_reg(dreg, RAX, ZERO); dmem_write(tmp1); diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp index 7c48867234..09b87be54a 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitMisc.cpp @@ -9,310 +9,6 @@ #include "x64ABI.h" using namespace Gen; -//clobbers: -//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//expects: -void DSPEmitter::dsp_reg_stack_push(int stack_reg) -{ - //g_dsp.reg_stack_ptr[stack_reg]++; - //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; - MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); - ADD(8, R(AL), Imm8(1)); - AND(8, R(AL), Imm8(DSP_STACK_MASK)); - MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); - - X64Reg tmp1; - gpr.getFreeXReg(tmp1); - //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; - MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); -#ifdef _M_IX86 // All32 - MOVZX(32, 8, EAX, R(AL)); -#else - MOVZX(64, 8, RAX, R(AL)); -#endif - MOV(16, MComplex(EAX, EAX, 1, - PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1)); - gpr.putXReg(tmp1); -} - -//clobbers: -//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] -//expects: -void DSPEmitter::dsp_reg_stack_pop(int stack_reg) -{ - //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; - MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); - X64Reg tmp1; - gpr.getFreeXReg(tmp1); -#ifdef _M_IX86 // All32 - MOVZX(32, 8, EAX, R(AL)); -#else - MOVZX(64, 8, RAX, R(AL)); -#endif - MOV(16, R(tmp1), MComplex(EAX, EAX, 1, - PtrOffset(&g_dsp.reg_stack[stack_reg][0],0))); - MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); - gpr.putXReg(tmp1); - - //g_dsp.reg_stack_ptr[stack_reg]--; - //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; - SUB(8, R(AL), Imm8(1)); - AND(8, R(AL), Imm8(DSP_STACK_MASK)); - MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); -} - - -void DSPEmitter::dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg) -{ - if (host_sreg != EDX) { - MOV(16, R(EDX), R(host_sreg)); - } - dsp_reg_stack_push(stack_reg); - //g_dsp.r[DSP_REG_ST0 + stack_reg] = val; - MOV(16, M(&g_dsp.r.st[stack_reg]), R(EDX)); -} - -void DSPEmitter::dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg) -{ - //u16 val = g_dsp.r[DSP_REG_ST0 + stack_reg]; - MOV(16, R(EDX), M(&g_dsp.r.st[stack_reg])); - dsp_reg_stack_pop(stack_reg); - if (host_dreg != EDX) { - MOV(16, R(host_dreg), R(EDX)); - } -} - -void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val) -{ - dsp_reg_stack_push(stack_reg); - //g_dsp.r[DSP_REG_ST0 + stack_reg] = val; - MOV(16, M(&g_dsp.r.st[stack_reg]), Imm16(val)); -} - -void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) -{ - switch (reg & 0x1f) { - // 8-bit sign extended registers. - case DSP_REG_ACH0: - case DSP_REG_ACH1: - gpr.writeReg(reg, R(host_sreg)); - break; - - // Stack registers. - case DSP_REG_ST0: - case DSP_REG_ST1: - case DSP_REG_ST2: - case DSP_REG_ST3: - dsp_reg_store_stack(reg - DSP_REG_ST0, host_sreg); - break; - - default: - gpr.writeReg(reg, R(host_sreg)); - break; - } -} - -void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) -{ - switch (reg & 0x1f) { - // 8-bit sign extended registers. Should look at prod.h too... - case DSP_REG_ACH0: - case DSP_REG_ACH1: - gpr.writeReg(reg, Imm16((u16)(s16)(s8)(u8)val)); - break; - // Stack registers. - case DSP_REG_ST0: - case DSP_REG_ST1: - case DSP_REG_ST2: - case DSP_REG_ST3: - dsp_reg_store_stack_imm(reg - DSP_REG_ST0, val); - break; - - default: - gpr.writeReg(reg, Imm16(val)); - break; - } -} - -void DSPEmitter::dsp_conditional_extend_accum(int reg) -{ - switch (reg) - { - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - OpArg sr_reg; - gpr.getReg(DSP_REG_SR,sr_reg); - DSPJitRegCache c(gpr); - TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z,true); - //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) - //{ - // Sign extend into whole accum. - //u16 val = g_dsp.r[reg]; - get_acc_m(reg - DSP_REG_ACM0, EAX); - SHR(32, R(EAX), Imm8(16)); - //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; - //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; - set_acc_h(reg - DSP_REG_ACM0, R(RAX)); - set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); - //} - gpr.flushRegs(c); - SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR, false); - } - } -} - -void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) -{ - switch (reg) - { - case DSP_REG_ACM0: - case DSP_REG_ACM1: - { - OpArg sr_reg; - gpr.getReg(DSP_REG_SR,sr_reg); - DSPJitRegCache c(gpr); - TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z, true); - //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) - //{ - // Sign extend into whole accum. - //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; - //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; - set_acc_h(reg - DSP_REG_ACM0, Imm16((val & 0x8000)?0xffff:0x0000)); - set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); - //} - gpr.flushRegs(c); - SetJumpTarget(not_40bit); - gpr.putReg(DSP_REG_SR, false); - } - } -} - -void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend) -{ - switch (reg & 0x1f) - { - case DSP_REG_ST0: - case DSP_REG_ST1: - case DSP_REG_ST2: - case DSP_REG_ST3: - dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg); - switch(extend) { - case SIGN: -#ifdef _M_IX86 // All32 - MOVSX(32, 16, host_dreg, R(host_dreg)); -#else - MOVSX(64, 16, host_dreg, R(host_dreg)); -#endif - break; - case ZERO: -#ifdef _M_IX86 // All32 - MOVZX(32, 16, host_dreg, R(host_dreg)); -#else - MOVZX(64, 16, host_dreg, R(host_dreg)); -#endif - break; - case NONE: - default: - break; - } - return; - default: - gpr.readReg(reg, host_dreg, extend); - return; - } -} - -void DSPEmitter::dsp_op_read_reg_and_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend) -{ - //we already know this is ACCM0 or ACCM1 -#ifdef _M_IX86 // All32 - gpr.readReg(reg, host_dreg, extend); -#else - OpArg acc_reg; - gpr.getReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, acc_reg); -#endif - OpArg sr_reg; - gpr.getReg(DSP_REG_SR,sr_reg); - - DSPJitRegCache c(gpr); - TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); - FixupBranch not_40bit = J_CC(CC_Z, true); - -#ifdef _M_IX86 // All32 - DSPJitRegCache c2(gpr); - gpr.putReg(DSP_REG_SR, false); - X64Reg tmp1; - gpr.getFreeXReg(tmp1); - gpr.readReg(reg-DSP_REG_ACM0+DSP_REG_ACH0, tmp1, NONE); - MOVSX(32,16,host_dreg,R(host_dreg)); - SHL(32, R(tmp1), Imm8(16)); - MOV(16,R(tmp1),R(host_dreg)); - CMP(32,R(host_dreg), R(tmp1)); - - FixupBranch no_saturate = J_CC(CC_Z); - - CMP(32,R(tmp1),Imm32(0)); - FixupBranch negative = J_CC(CC_LE); - - MOV(32,R(host_dreg),Imm32(0x7fff));//this works for all extend modes - FixupBranch done_positive = J(); - - SetJumpTarget(negative); - if (extend == NONE || extend == ZERO) - MOV(32,R(host_dreg),Imm32(0x00008000)); - else - MOV(32,R(host_dreg),Imm32(0xffff8000)); - FixupBranch done_negative = J(); - - SetJumpTarget(no_saturate); - if (extend == ZERO) - MOVZX(32,16,host_dreg,R(host_dreg)); - SetJumpTarget(done_positive); - SetJumpTarget(done_negative); - gpr.putXReg(tmp1); - gpr.flushRegs(c2); - SetJumpTarget(not_40bit); - gpr.flushRegs(c); -#else - - MOVSX(64,32,host_dreg,acc_reg); - CMP(64,R(host_dreg),acc_reg); - FixupBranch no_saturate = J_CC(CC_Z); - - CMP(64,acc_reg,Imm32(0)); - FixupBranch negative = J_CC(CC_LE); - - MOV(64,R(host_dreg),Imm32(0x7fff));//this works for all extend modes - FixupBranch done_positive = J(); - - SetJumpTarget(negative); - if (extend == NONE || extend == ZERO) - MOV(64,R(host_dreg),Imm32(0x00008000)); - else - MOV(64,R(host_dreg),Imm32(0xffff8000)); - FixupBranch done_negative = J(); - - SetJumpTarget(no_saturate); - SetJumpTarget(not_40bit); - - MOV(64, R(host_dreg), acc_reg); - if (extend == NONE || extend == ZERO) - SHR(64, R(host_dreg), Imm8(16)); - else - SAR(64, R(host_dreg), Imm8(16)); - SetJumpTarget(done_positive); - SetJumpTarget(done_negative); - gpr.flushRegs(c); - gpr.putReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, false); -#endif - - gpr.putReg(DSP_REG_SR, false); -} - // MRR $D, $S // 0001 11dd ddds ssss // Move value from register $S to register $D. @@ -321,10 +17,7 @@ void DSPEmitter::mrr(const UDSPInstruction opc) u8 sreg = opc & 0x1f; u8 dreg = (opc >> 5) & 0x1f; - if (sreg >= DSP_REG_ACM0) - dsp_op_read_reg_and_saturate(sreg, EDX); - else - dsp_op_read_reg(sreg, EDX); + dsp_op_read_reg(sreg, EDX); dsp_op_write_reg(dreg, EDX); dsp_conditional_extend_accum(dreg); } diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp index 652e7817df..1a693ede34 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitRegCache.cpp @@ -255,6 +255,18 @@ void DSPJitRegCache::flushRegs(DSPJitRegCache &cache, bool emit) regs[i].last_use_ctr = cache.regs[i].last_use_ctr; } + //sync the freely used xregs + if (!emit) { + for(i = 0; i < NUMXREGS; i++) { + if (cache.xregs[i].guest_reg == DSP_REG_USED && + xregs[i].guest_reg == DSP_REG_NONE) + xregs[i].guest_reg = DSP_REG_USED; + if (cache.xregs[i].guest_reg == DSP_REG_NONE && + xregs[i].guest_reg == DSP_REG_USED) + xregs[i].guest_reg = DSP_REG_NONE; + } + } + //consistency checks for(i = 0; i < NUMXREGS; i++) { @@ -389,7 +401,7 @@ void DSPJitRegCache::loadRegs(bool emit) for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { if (regs[i].host_reg != INVALID_REG) - movToHostReg(i,regs[i].host_reg); + movToHostReg(i,regs[i].host_reg, emit); } if (emit) @@ -519,7 +531,7 @@ void DSPJitRegCache::popRegs() { for(unsigned int i = 0; i <= DSP_REG_MAX_MEM_BACKED; i++) { if (regs[i].host_reg != INVALID_REG) - movToHostReg(i,regs[i].host_reg); + movToHostReg(i,regs[i].host_reg, true); } } @@ -916,17 +928,38 @@ void DSPJitRegCache::writeReg(int dreg, OpArg arg) { OpArg reg; getReg(dreg, reg, false); - - switch(regs[dreg].size) + if (arg.IsImm()) { - case 2: emitter.MOV(16, reg, arg); break; - case 4: emitter.MOV(32, reg, arg); break; + switch(regs[dreg].size) + { + case 2: emitter.MOV(16, reg, Imm16(arg.offset)); break; + case 4: emitter.MOV(32, reg, Imm32(arg.offset)); break; #ifdef _M_X64 - case 8: emitter.MOV(64, reg, arg); break; + case 8: + if ((s32)arg.offset == (s64)arg.offset) + emitter.MOV(64, reg, Imm32(arg.offset)); + else + emitter.MOV(64, reg, Imm64(arg.offset)); + break; #endif - default: - _assert_msg_(DSPLLE, 0, "unsupported memory size"); - break; + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } + } + else + { + switch(regs[dreg].size) + { + case 2: emitter.MOV(16, reg, arg); break; + case 4: emitter.MOV(32, reg, arg); break; +#ifdef _M_X64 + case 8: emitter.MOV(64, reg, arg); break; +#endif + default: + _assert_msg_(DSPLLE, 0, "unsupported memory size"); + break; + } } putReg(dreg, true); } @@ -1042,8 +1075,7 @@ void DSPJitRegCache::getXReg(X64Reg reg) if (xregs[reg].guest_reg != DSP_REG_NONE) spillXReg(reg); - - _assert_msg_(DSPLLE, xregs[reg].guest_reg != DSP_REG_NONE, "register already in use"); + _assert_msg_(DSPLLE, xregs[reg].guest_reg == DSP_REG_NONE, "register already in use"); xregs[reg].guest_reg = DSP_REG_USED; } diff --git a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp index 641207df5d..e313920361 100644 --- a/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/Src/DSP/Jit/DSPJitUtil.cpp @@ -11,6 +11,352 @@ using namespace Gen; +//clobbers: +//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] +//expects: +void DSPEmitter::dsp_reg_stack_push(int stack_reg) +{ + //g_dsp.reg_stack_ptr[stack_reg]++; + //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; + MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); + ADD(8, R(AL), Imm8(1)); + AND(8, R(AL), Imm8(DSP_STACK_MASK)); + MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); + + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; + MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); +#ifdef _M_IX86 // All32 + MOVZX(32, 8, EAX, R(AL)); +#else + MOVZX(64, 8, RAX, R(AL)); +#endif + MOV(16, MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0)), R(tmp1)); + gpr.putXReg(tmp1); +} + +//clobbers: +//EAX = (s8)g_dsp.reg_stack_ptr[stack_reg] +//expects: +void DSPEmitter::dsp_reg_stack_pop(int stack_reg) +{ + //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; + MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); +#ifdef _M_IX86 // All32 + MOVZX(32, 8, EAX, R(AL)); +#else + MOVZX(64, 8, RAX, R(AL)); +#endif + MOV(16, R(tmp1), MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],0))); + MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); + gpr.putXReg(tmp1); + + //g_dsp.reg_stack_ptr[stack_reg]--; + //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; + SUB(8, R(AL), Imm8(1)); + AND(8, R(AL), Imm8(DSP_STACK_MASK)); + MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); +} + + +void DSPEmitter::dsp_reg_store_stack(int stack_reg, Gen::X64Reg host_sreg) +{ + if (host_sreg != EDX) + { + MOV(16, R(EDX), R(host_sreg)); + } + dsp_reg_stack_push(stack_reg); + //g_dsp.r[DSP_REG_ST0 + stack_reg] = val; + MOV(16, M(&g_dsp.r.st[stack_reg]), R(EDX)); +} + +void DSPEmitter::dsp_reg_load_stack(int stack_reg, Gen::X64Reg host_dreg) +{ + //u16 val = g_dsp.r[DSP_REG_ST0 + stack_reg]; + MOV(16, R(EDX), M(&g_dsp.r.st[stack_reg])); + dsp_reg_stack_pop(stack_reg); + if (host_dreg != EDX) + { + MOV(16, R(host_dreg), R(EDX)); + } +} + +void DSPEmitter::dsp_reg_store_stack_imm(int stack_reg, u16 val) +{ + dsp_reg_stack_push(stack_reg); + //g_dsp.r[DSP_REG_ST0 + stack_reg] = val; + MOV(16, M(&g_dsp.r.st[stack_reg]), Imm16(val)); +} + +void DSPEmitter::dsp_op_write_reg(int reg, Gen::X64Reg host_sreg) +{ + switch (reg & 0x1f) + { + // 8-bit sign extended registers. + case DSP_REG_ACH0: + case DSP_REG_ACH1: + gpr.writeReg(reg, R(host_sreg)); + break; + + // Stack registers. + case DSP_REG_ST0: + case DSP_REG_ST1: + case DSP_REG_ST2: + case DSP_REG_ST3: + dsp_reg_store_stack(reg - DSP_REG_ST0, host_sreg); + break; + + default: + gpr.writeReg(reg, R(host_sreg)); + break; + } +} + +void DSPEmitter::dsp_op_write_reg_imm(int reg, u16 val) +{ + switch (reg & 0x1f) + { + // 8-bit sign extended registers. Should look at prod.h too... + case DSP_REG_ACH0: + case DSP_REG_ACH1: + gpr.writeReg(reg, Imm16((u16)(s16)(s8)(u8)val)); + break; + // Stack registers. + case DSP_REG_ST0: + case DSP_REG_ST1: + case DSP_REG_ST2: + case DSP_REG_ST3: + dsp_reg_store_stack_imm(reg - DSP_REG_ST0, val); + break; + + default: + gpr.writeReg(reg, Imm16(val)); + break; + } +} + +void DSPEmitter::dsp_conditional_extend_accum(int reg) +{ + switch (reg) + { + case DSP_REG_ACM0: + case DSP_REG_ACM1: + { + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + DSPJitRegCache c(gpr); + TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); + FixupBranch not_40bit = J_CC(CC_Z,true); + //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) + //{ + // Sign extend into whole accum. + //u16 val = g_dsp.r[reg]; + get_acc_m(reg - DSP_REG_ACM0, EAX); + SHR(32, R(EAX), Imm8(16)); + //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; + //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; + set_acc_h(reg - DSP_REG_ACM0, R(RAX)); + set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); + //} + gpr.flushRegs(c); + SetJumpTarget(not_40bit); + gpr.putReg(DSP_REG_SR, false); + } + } +} + +void DSPEmitter::dsp_conditional_extend_accum_imm(int reg, u16 val) +{ + switch (reg) + { + case DSP_REG_ACM0: + case DSP_REG_ACM1: + { + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + DSPJitRegCache c(gpr); + TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); + FixupBranch not_40bit = J_CC(CC_Z, true); + //if (g_dsp.r[DSP_REG_SR] & SR_40_MODE_BIT) + //{ + // Sign extend into whole accum. + //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = (val & 0x8000) ? 0xFFFF : 0x0000; + //g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0; + set_acc_h(reg - DSP_REG_ACM0, Imm16((val & 0x8000)?0xffff:0x0000)); + set_acc_l(reg - DSP_REG_ACM0, Imm16(0)); + //} + gpr.flushRegs(c); + SetJumpTarget(not_40bit); + gpr.putReg(DSP_REG_SR, false); + } + } +} + +void DSPEmitter::dsp_op_read_reg_dont_saturate(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend) +{ + switch (reg & 0x1f) + { + case DSP_REG_ST0: + case DSP_REG_ST1: + case DSP_REG_ST2: + case DSP_REG_ST3: + dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg); + switch(extend) + { + case SIGN: +#ifdef _M_IX86 // All32 + MOVSX(32, 16, host_dreg, R(host_dreg)); +#else + MOVSX(64, 16, host_dreg, R(host_dreg)); +#endif + break; + case ZERO: +#ifdef _M_IX86 // All32 + MOVZX(32, 16, host_dreg, R(host_dreg)); +#else + MOVZX(64, 16, host_dreg, R(host_dreg)); +#endif + break; + case NONE: + default: + break; + } + return; + default: + gpr.readReg(reg, host_dreg, extend); + return; + } +} + +void DSPEmitter::dsp_op_read_reg(int reg, Gen::X64Reg host_dreg, DSPJitSignExtend extend) +{ + switch (reg & 0x1f) + { + case DSP_REG_ST0: + case DSP_REG_ST1: + case DSP_REG_ST2: + case DSP_REG_ST3: + dsp_reg_load_stack(reg - DSP_REG_ST0, host_dreg); + switch(extend) + { + case SIGN: +#ifdef _M_IX86 // All32 + MOVSX(32, 16, host_dreg, R(host_dreg)); +#else + MOVSX(64, 16, host_dreg, R(host_dreg)); +#endif + break; + case ZERO: +#ifdef _M_IX86 // All32 + MOVZX(32, 16, host_dreg, R(host_dreg)); +#else + MOVZX(64, 16, host_dreg, R(host_dreg)); +#endif + break; + case NONE: + default: + break; + } + return; + case DSP_REG_ACM0: + case DSP_REG_ACM1: + { + //we already know this is ACCM0 or ACCM1 +#ifdef _M_IX86 // All32 + gpr.readReg(reg, host_dreg, extend); +#else + OpArg acc_reg; + gpr.getReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, acc_reg); +#endif + OpArg sr_reg; + gpr.getReg(DSP_REG_SR,sr_reg); + + DSPJitRegCache c(gpr); + TEST(16, sr_reg, Imm16(SR_40_MODE_BIT)); + FixupBranch not_40bit = J_CC(CC_Z, true); + +#ifdef _M_IX86 // All32 + DSPJitRegCache c2(gpr); + gpr.putReg(DSP_REG_SR, false); + X64Reg tmp1; + gpr.getFreeXReg(tmp1); + gpr.readReg(reg-DSP_REG_ACM0+DSP_REG_ACH0, tmp1, NONE); + MOVSX(32,16,host_dreg,R(host_dreg)); + SHL(32, R(tmp1), Imm8(16)); + MOV(16,R(tmp1),R(host_dreg)); + CMP(32,R(host_dreg), R(tmp1)); + + FixupBranch no_saturate = J_CC(CC_Z); + + CMP(32,R(tmp1),Imm32(0)); + FixupBranch negative = J_CC(CC_LE); + + MOV(32,R(host_dreg),Imm32(0x7fff));//this works for all extend modes + FixupBranch done_positive = J(); + + SetJumpTarget(negative); + if (extend == NONE || extend == ZERO) + MOV(32,R(host_dreg),Imm32(0x00008000)); + else + MOV(32,R(host_dreg),Imm32(0xffff8000)); + FixupBranch done_negative = J(); + + SetJumpTarget(no_saturate); + if (extend == ZERO) + MOVZX(32,16,host_dreg,R(host_dreg)); + SetJumpTarget(done_positive); + SetJumpTarget(done_negative); + gpr.putXReg(tmp1); + gpr.flushRegs(c2); + SetJumpTarget(not_40bit); + gpr.flushRegs(c); +#else + + MOVSX(64,32,host_dreg,acc_reg); + CMP(64,R(host_dreg),acc_reg); + FixupBranch no_saturate = J_CC(CC_Z); + + CMP(64,acc_reg,Imm32(0)); + FixupBranch negative = J_CC(CC_LE); + + MOV(64,R(host_dreg),Imm32(0x7fff));//this works for all extend modes + FixupBranch done_positive = J(); + + SetJumpTarget(negative); + if (extend == NONE || extend == ZERO) + MOV(64,R(host_dreg),Imm32(0x00008000)); + else + MOV(64,R(host_dreg),Imm32(0xffff8000)); + FixupBranch done_negative = J(); + + SetJumpTarget(no_saturate); + SetJumpTarget(not_40bit); + + MOV(64, R(host_dreg), acc_reg); + if (extend == NONE || extend == ZERO) + SHR(64, R(host_dreg), Imm8(16)); + else + SAR(64, R(host_dreg), Imm8(16)); + SetJumpTarget(done_positive); + SetJumpTarget(done_negative); + gpr.flushRegs(c); + gpr.putReg(reg-DSP_REG_ACM0+DSP_REG_ACC0_64, false); +#endif + + gpr.putReg(DSP_REG_SR, false); + } + return; + default: + gpr.readReg(reg, host_dreg, extend); + return; + } +} + // addr math // // These functions detect overflow by checking if @@ -119,13 +465,14 @@ void DSPEmitter::increase_addr_reg(int reg, int _ix_reg) //eax = dar XOR(32, R(EAX), R(ECX)); XOR(32, R(EAX), R(tmp1)); - LEA(32, ECX, MRegSum(EDX, EDX)); - OR(32, R(ECX), Imm8(2)); - AND(32, R(EAX), R(ECX)); //if (ix >= 0) TEST(32, R(ECX), R(ECX)); FixupBranch negative = J_CC(CC_S); + LEA(32, ECX, MRegSum(EDX, EDX)); + OR(32, R(ECX), Imm8(2)); + AND(32, R(EAX), R(ECX)); + //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); @@ -136,6 +483,10 @@ void DSPEmitter::increase_addr_reg(int reg, int _ix_reg) //else SetJumpTarget(negative); + LEA(32, ECX, MRegSum(EDX, EDX)); + OR(32, R(ECX), Imm8(2)); + AND(32, R(EAX), R(ECX)); + //if ((((nar + wr + 1) ^ nar) & dar) <= wr) LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); XOR(32, R(ECX), R(tmp1)); @@ -184,13 +535,14 @@ void DSPEmitter::decrease_addr_reg(int reg) //eax = dar XOR(32, R(EAX), R(ECX)); XOR(32, R(EAX), R(tmp1)); - LEA(32, ECX, MRegSum(EDX, EDX)); - OR(32, R(ECX), Imm8(2)); - AND(32, R(EAX), R(ECX)); //if ((u32)ix > 0xFFFF8000) ==> (~ix < 0x00007FFF) CMP(32, R(ECX), Imm32(0x00007FFF)); FixupBranch positive = J_CC(CC_AE); + LEA(32, ECX, MRegSum(EDX, EDX)); + OR(32, R(ECX), Imm8(2)); + AND(32, R(EAX), R(ECX)); + //if (dar > wr) CMP(32, R(EAX), R(EDX)); FixupBranch done = J_CC(CC_BE); @@ -201,6 +553,10 @@ void DSPEmitter::decrease_addr_reg(int reg) //else SetJumpTarget(positive); + LEA(32, ECX, MRegSum(EDX, EDX)); + OR(32, R(ECX), Imm8(2)); + AND(32, R(EAX), R(ECX)); + //if ((((nar + wr + 1) ^ nar) & dar) <= wr) LEA(32, ECX, MComplex(tmp1, EDX, 1, 1)); XOR(32, R(ECX), R(tmp1)); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 97185561fa..54fa742148 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -1997,7 +1997,7 @@ void Jit64::slwx(UGeckoInstruction inst) } else { - MOV(32, gpr.R(a), gpr.R(a)); + MOVZX(64, 32, gpr.R(a).GetSimpleReg(), gpr.R(a)); } gpr.UnlockAll(); gpr.UnlockAllX();