diff --git a/Source/Core/Core/DSP/DSPAnalyzer.cpp b/Source/Core/Core/DSP/DSPAnalyzer.cpp index 19d563f4b2..a62de02771 100644 --- a/Source/Core/Core/DSP/DSPAnalyzer.cpp +++ b/Source/Core/Core/DSP/DSPAnalyzer.cpp @@ -91,7 +91,9 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad { // This may not be 100% accurate in case of jump tables! // It could get desynced, which would be bad. We'll see if that's an issue. +#ifndef DISABLE_UPDATE_SR_ANALYSIS u16 last_arithmetic = 0; +#endif for (u16 addr = start_addr; addr < end_addr;) { const UDSPInstruction inst = dsp.ReadIMEM(addr); @@ -117,6 +119,7 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad m_code_flags[static_cast(addr + 1u)] |= CODE_LOOP_END; } +#ifndef DISABLE_UPDATE_SR_ANALYSIS // Mark the last arithmetic/multiplier instruction before a branch. // We must update the SR reg at these instructions if (opcode->updates_sr) @@ -128,6 +131,7 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad { m_code_flags[last_arithmetic] |= CODE_UPDATE_SR; } +#endif // If an instruction potentially raises exceptions, mark the following // instruction as needing to check for exceptions diff --git a/Source/Core/Core/DSP/DSPAnalyzer.h b/Source/Core/Core/DSP/DSPAnalyzer.h index c5875e3701..5f172f48be 100644 --- a/Source/Core/Core/DSP/DSPAnalyzer.h +++ b/Source/Core/Core/DSP/DSPAnalyzer.h @@ -6,6 +6,13 @@ #include #include "Common/CommonTypes.h" +// The update SR analysis is not perfect: it does not properly handle modified SR values if SR is +// only read within a function call, and it's possible that a previous instruction sets SR (e.g. the +// logical zero bit, or the sticky overflow bit) but is marked as not changing SR as a later +// instruction sets it. When this flag is set, we always treat instructions as updating SR, and +// disable the analysis for if SR needs to be set. +#define DISABLE_UPDATE_SR_ANALYSIS + namespace DSP { struct SDSP; @@ -63,7 +70,11 @@ public: // Whether or not the address describes an instruction that requires updating the SR register. [[nodiscard]] bool IsUpdateSR(u16 address) const { +#ifdef DISABLE_UPDATE_SR_ANALYSIS + return true; +#else return (GetCodeFlags(address) & CODE_UPDATE_SR) != 0; +#endif } // Whether or not the address describes instructions that potentially raise exceptions. diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h index 506e3995d4..6455349a09 100644 --- a/Source/Core/Core/DSP/DSPCore.h +++ b/Source/Core/Core/DSP/DSPCore.h @@ -271,7 +271,7 @@ struct DSP_Regs { u16 l; u16 m; - u16 h; + u32 h; // 32 bits so that val is fully sign-extended (only 8 bits are actually used) }; } ac[2]; }; diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp index 41ff79cc67..c4319db554 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp @@ -119,8 +119,7 @@ void Interpreter::cmp(const UDSPInstruction) const s64 acc1 = GetLongAcc(1); const s64 res = dsp_convert_long_acc(acc0 - acc1); - UpdateSR64(res, isCarry2(acc0, res), - isOverflow(acc0, -acc1, res)); // CF -> influence on ABS/0xa100 + UpdateSR64Sub(acc0, acc1, res); ZeroWriteBackLog(); } @@ -134,12 +133,12 @@ void Interpreter::cmpar(const UDSPInstruction opc) const u8 rreg = (opc >> 12) & 0x1; const u8 sreg = (opc >> 11) & 0x1; - const s64 sr = GetLongAcc(sreg); - s64 rr = GetAXHigh(rreg); - rr <<= 16; - const s64 res = dsp_convert_long_acc(sr - rr); + const s64 acc = GetLongAcc(sreg); + s64 ax = GetAXHigh(rreg); + ax <<= 16; + const s64 res = dsp_convert_long_acc(acc - ax); - UpdateSR64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); + UpdateSR64Sub(acc, ax, res); ZeroWriteBackLog(); } @@ -157,10 +156,11 @@ void Interpreter::cmpi(const UDSPInstruction opc) const s64 val = GetLongAcc(reg); // Immediate is considered to be at M level in the 40-bit accumulator. - const s64 imm = (s64)(s16)state.FetchInstruction() << 16; + s64 imm = static_cast(state.FetchInstruction()); + imm <<= 16; const s64 res = dsp_convert_long_acc(val - imm); - UpdateSR64(res, isCarry2(val, res), isOverflow(val, -imm, res)); + UpdateSR64Sub(val, imm, res); } // CMPIS $acD, #I @@ -175,11 +175,11 @@ void Interpreter::cmpis(const UDSPInstruction opc) const u8 areg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(areg); - s64 val = (s8)opc; - val <<= 16; - const s64 res = dsp_convert_long_acc(acc - val); + s64 imm = static_cast(opc); + imm <<= 16; + const s64 res = dsp_convert_long_acc(acc - imm); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); + UpdateSR64Sub(acc, imm, res); } //---- @@ -401,13 +401,12 @@ void Interpreter::addr(const UDSPInstruction opc) } ax <<= 16; - s64 res = acc + ax; + const s64 res = acc + ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + UpdateSR64Add(acc, ax, GetLongAcc(dreg)); } // ADDAX $acD, $axS @@ -422,13 +421,12 @@ void Interpreter::addax(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 ax = GetLongACX(sreg); - s64 res = acc + ax; + const s64 res = acc + ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + UpdateSR64Add(acc, ax, GetLongAcc(dreg)); } // ADD $acD, $ac(1-D) @@ -442,13 +440,12 @@ void Interpreter::add(const UDSPInstruction opc) const s64 acc0 = GetLongAcc(dreg); const s64 acc1 = GetLongAcc(1 - dreg); - s64 res = acc0 + acc1; + const s64 res = acc0 + acc1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); + UpdateSR64Add(acc0, acc1, GetLongAcc(dreg)); } // ADDP $acD @@ -462,13 +459,12 @@ void Interpreter::addp(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 prod = GetLongProduct(); - s64 res = acc + prod; + const s64 res = acc + prod; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, prod, res)); + UpdateSR64Add(acc, prod, GetLongAcc(dreg)); } // ADDAXL $acD, $axS.l @@ -484,15 +480,12 @@ void Interpreter::addaxl(const UDSPInstruction opc) const u64 acc = GetLongAcc(dreg); const u16 acx = static_cast(GetAXLow(sreg)); - - u64 res = acc + acx; + const u64 res = acc + acx; ZeroWriteBackLog(); SetLongAcc(dreg, static_cast(res)); - res = GetLongAcc(dreg); - UpdateSR64(static_cast(res), isCarry(acc, res), - isOverflow(static_cast(acc), static_cast(acx), static_cast(res))); + UpdateSR64Add(acc, acx, GetLongAcc(dreg)); } // ADDI $amR, #I @@ -509,11 +502,10 @@ void Interpreter::addi(const UDSPInstruction opc) const s64 acc = GetLongAcc(areg); s64 imm = static_cast(state.FetchInstruction()); imm <<= 16; - s64 res = acc + imm; + const s64 res = acc + imm; SetLongAcc(areg, res); - res = GetLongAcc(areg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + UpdateSR64Add(acc, imm, GetLongAcc(areg)); } // ADDIS $acD, #I @@ -526,13 +518,12 @@ void Interpreter::addis(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(dreg); - s64 imm = static_cast(static_cast(opc)); + s64 imm = static_cast(opc); imm <<= 16; - s64 res = acc + imm; + const s64 res = acc + imm; SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + UpdateSR64Add(acc, imm, GetLongAcc(dreg)); } // INCM $acsD @@ -546,13 +537,12 @@ void Interpreter::incm(const UDSPInstruction opc) const s64 sub = 0x10000; const s64 acc = GetLongAcc(dreg); - s64 res = acc + sub; + const s64 res = acc + sub; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, sub, res)); + UpdateSR64Add(acc, sub, GetLongAcc(dreg)); } // INC $acD @@ -565,13 +555,12 @@ void Interpreter::inc(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(dreg); - s64 res = acc + 1; + const s64 res = acc + 1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, 1, res)); + UpdateSR64Add(acc, 1, GetLongAcc(dreg)); } //---- @@ -606,13 +595,12 @@ void Interpreter::subr(const UDSPInstruction opc) } ax <<= 16; - s64 res = acc - ax; + const s64 res = acc - ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); + UpdateSR64Sub(acc, ax, GetLongAcc(dreg)); } // SUBAX $acD, $axS @@ -627,13 +615,12 @@ void Interpreter::subax(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 acx = GetLongACX(sreg); - s64 res = acc - acx; + const s64 res = acc - acx; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -acx, res)); + UpdateSR64Sub(acc, acx, GetLongAcc(dreg)); } // SUB $acD, $ac(1-D) @@ -647,13 +634,12 @@ void Interpreter::sub(const UDSPInstruction opc) const s64 acc1 = GetLongAcc(dreg); const s64 acc2 = GetLongAcc(1 - dreg); - s64 res = acc1 - acc2; + const s64 res = acc1 - acc2; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc1, res), isOverflow(acc1, -acc2, res)); + UpdateSR64Sub(acc1, acc2, GetLongAcc(dreg)); } // SUBP $acD @@ -667,13 +653,12 @@ void Interpreter::subp(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 prod = GetLongProduct(); - s64 res = acc - prod; + const s64 res = acc - prod; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -prod, res)); + UpdateSR64Sub(acc, prod, GetLongAcc(dreg)); } // DECM $acsD @@ -687,13 +672,12 @@ void Interpreter::decm(const UDSPInstruction opc) const s64 sub = 0x10000; const s64 acc = GetLongAcc(dreg); - s64 res = acc - sub; + const s64 res = acc - sub; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -sub, res)); + UpdateSR64Sub(acc, sub, GetLongAcc(dreg)); } // DEC $acD @@ -706,13 +690,12 @@ void Interpreter::dec(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x01; const s64 acc = GetLongAcc(dreg); - s64 res = acc - 1; + const s64 res = acc - 1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -1, res)); + UpdateSR64Sub(acc, 1, GetLongAcc(dreg)); } //---- @@ -721,18 +704,23 @@ void Interpreter::dec(const UDSPInstruction opc) // 0111 110d xxxx xxxx // Negate accumulator $acD. // -// flags out: --xx xx00 +// flags out: x-xx xxxx +// +// The carry flag is set only if $acD was zero. +// The overflow flag is set only if $acD was 0x8000000000 (the minimum value), +// as -INT_MIN is INT_MIN in two's complement. In both of these cases, +// the value of $acD after the operation is the same as it was before. void Interpreter::neg(const UDSPInstruction opc) { const u8 dreg = (opc >> 8) & 0x1; - s64 acc = GetLongAcc(dreg); - acc = 0 - acc; + const s64 acc = GetLongAcc(dreg); + const s64 res = 0 - acc; ZeroWriteBackLog(); - SetLongAcc(dreg, acc); - UpdateSR64(GetLongAcc(dreg)); + SetLongAcc(dreg, res); + UpdateSR64Sub(0, acc, GetLongAcc(dreg)); } // ABS $acD @@ -752,7 +740,7 @@ void Interpreter::abs(const UDSPInstruction opc) ZeroWriteBackLog(); SetLongAcc(dreg, acc); - UpdateSR64(GetLongAcc(dreg)); + UpdateSR64(GetLongAcc(dreg)); // TODO: Is this right? } //---- @@ -856,7 +844,7 @@ void Interpreter::lsr16(const UDSPInstruction opc) u64 acc = GetLongAcc(areg); // Lop off the extraneous sign extension our 64-bit fake accum causes - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; acc >>= 16; ZeroWriteBackLog(); @@ -912,7 +900,7 @@ void Interpreter::lsr(const UDSPInstruction opc) u16 shift; u64 acc = GetLongAcc(rreg); // Lop off the extraneous sign extension our 64-bit fake accum causes - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((opc & 0x3f) == 0) shift = 0; @@ -977,7 +965,7 @@ void Interpreter::lsrn(const UDSPInstruction opc) s16 shift; const u16 accm = static_cast(GetAccMid(1)); u64 acc = GetLongAcc(0); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((accm & 0x3f) == 0) shift = 0; @@ -1046,7 +1034,7 @@ void Interpreter::lsrnrx(const UDSPInstruction opc) s16 shift; const u16 axh = state.r.ax[sreg].h; u64 acc = GetLongAcc(dreg); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((axh & 0x3f) == 0) shift = 0; @@ -1121,7 +1109,7 @@ void Interpreter::lsrnr(const UDSPInstruction opc) s16 shift; const u16 accm = static_cast(GetAccMid(1 - dreg)); u64 acc = GetLongAcc(dreg); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((accm & 0x3f) == 0) shift = 0; diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h b/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h index 0db9dc969d..3e970cc2ac 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h +++ b/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h @@ -11,18 +11,19 @@ namespace DSP::Interpreter { -constexpr bool isCarry(u64 val, u64 result) +constexpr bool isCarryAdd(u64 val, u64 result) { return val > result; } -constexpr bool isCarry2(u64 val, u64 result) +constexpr bool isCarrySubtract(u64 val, u64 result) { return val >= result; } constexpr bool isOverflow(s64 val1, s64 val2, s64 res) { + // val1 > 0 and val1 > 0 yet res < 0, or val1 < 0 and val2 < 0 yet res > 0. return ((val1 ^ res) & (val2 ^ res)) < 0; } diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp index a334f99d3e..0816e8deed 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp @@ -117,7 +117,7 @@ void Interpreter::addpaxz(const UDSPInstruction opc) SetLongAcc(dreg, res); res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(oldprod, res), false); + UpdateSR64(res, isCarryAdd(oldprod, res), false); // TODO: Why doesn't this set the overflow bit? } //---- diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index 1fd3aa3ff3..d0cb74f764 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -11,6 +11,7 @@ #include "Core/DSP/DSPAnalyzer.h" #include "Core/DSP/DSPCore.h" #include "Core/DSP/DSPTables.h" +#include "Core/DSP/Interpreter/DSPIntCCUtil.h" #include "Core/DSP/Interpreter/DSPIntTables.h" namespace DSP::Interpreter @@ -253,8 +254,8 @@ bool Interpreter::CheckCondition(u8 condition) const const auto IsLess = [this] { return IsSRFlagSet(SR_OVERFLOW) != IsSRFlagSet(SR_SIGN); }; const auto IsZero = [this] { return IsSRFlagSet(SR_ARITH_ZERO); }; const auto IsLogicZero = [this] { return IsSRFlagSet(SR_LOGIC_ZERO); }; - const auto IsConditionA = [this] { - return (IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) && !IsSRFlagSet(SR_ARITH_ZERO); + const auto IsConditionB = [this] { + return (!(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS))) || IsSRFlagSet(SR_ARITH_ZERO); }; switch (condition & 0xf) @@ -282,14 +283,14 @@ bool Interpreter::CheckCondition(u8 condition) const case 0x9: // ? - Over s32 return IsOverS32(); case 0xa: // ? - return IsConditionA(); + return !IsConditionB(); case 0xb: // ? - return !IsConditionA(); + return IsConditionB(); case 0xc: // LNZ - Logic Not Zero return !IsLogicZero(); case 0xd: // LZ - Logic Zero return IsLogicZero(); - case 0xe: // 0 - Overflow + case 0xe: // O - Overflow return IsOverflow(); default: return true; @@ -547,8 +548,16 @@ void Interpreter::UpdateSR16(s16 value, bool carry, bool overflow, bool over_s32 } } +static constexpr bool IsProperlySignExtended(u64 val) +{ + const u64 topbits = val & 0xffff'ff80'0000'0000ULL; + return (topbits == 0) || (0xffff'ff80'0000'0000ULL == topbits); +} + void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) { + DEBUG_ASSERT(IsProperlySignExtended(value)); + auto& state = m_dsp_core.DSPState(); state.r.sr &= ~SR_CMP_MASK; @@ -579,7 +588,7 @@ void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) } // 0x10 - if (value != static_cast(value)) + if (isOverS32(value)) { state.r.sr |= SR_OVER_S32; } @@ -591,6 +600,28 @@ void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) } } +// Updates SR based on a 64-bit value computed by result = val1 + val2. +// Result is a separate parameter that is properly sign-extended, and as such may not equal the +// result of adding a and b in a 64-bit context. +void Interpreter::UpdateSR64Add(s64 val1, s64 val2, s64 result) +{ + DEBUG_ASSERT(((val1 + val2) & 0xff'ffff'ffffULL) == (result & 0xff'ffff'ffffULL)); + DEBUG_ASSERT(IsProperlySignExtended(val1)); + DEBUG_ASSERT(IsProperlySignExtended(val2)); + UpdateSR64(result, isCarryAdd(val1, result), isOverflow(val1, val2, result)); +} + +// Updates SR based on a 64-bit value computed by result = val1 - val2. +// Result is a separate parameter that is properly sign-extended, and as such may not equal the +// result of adding a and b in a 64-bit context. +void Interpreter::UpdateSR64Sub(s64 val1, s64 val2, s64 result) +{ + DEBUG_ASSERT(((val1 - val2) & 0xff'ffff'ffffULL) == (result & 0xff'ffff'ffffULL)); + DEBUG_ASSERT(IsProperlySignExtended(val1)); + DEBUG_ASSERT(IsProperlySignExtended(val2)); + UpdateSR64(result, isCarrySubtract(val1, result), isOverflow(val1, -val2, result)); +} + void Interpreter::UpdateSRLogicZero(bool value) { auto& state = m_dsp_core.DSPState(); @@ -769,7 +800,7 @@ void Interpreter::ConditionalExtendAccum(int reg) // Sign extend into whole accum. auto& state = m_dsp_core.DSPState(); const u16 val = state.r.ac[reg - DSP_REG_ACM0].m; - state.r.ac[reg - DSP_REG_ACM0].h = (val & 0x8000) != 0 ? 0xFFFF : 0x0000; + state.r.ac[reg - DSP_REG_ACM0].h = (val & 0x8000) != 0 ? 0xFFFFFFFF : 0x0000; state.r.ac[reg - DSP_REG_ACM0].l = 0; } diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h index 119c509f2a..422c9a5f45 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h @@ -225,6 +225,8 @@ private: void UpdateSR16(s16 value, bool carry = false, bool overflow = false, bool over_s32 = false); void UpdateSR64(s64 value, bool carry = false, bool overflow = false); + void UpdateSR64Add(s64 val1, s64 val2, s64 result); + void UpdateSR64Sub(s64 val1, s64 val2, s64 result); void UpdateSRLogicZero(bool value); u16 OpReadRegister(int reg_); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h index ee74def463..05d52abe51 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h +++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h @@ -228,6 +228,7 @@ private: void get_long_prod(Gen::X64Reg long_prod = Gen::RAX); void get_long_prod_round_prodl(Gen::X64Reg long_prod = Gen::RAX); void set_long_prod(); + void dsp_convert_long_acc(Gen::X64Reg long_acc); // s64 -> s40 void round_long_acc(Gen::X64Reg long_acc = Gen::EAX); void set_long_acc(int _reg, Gen::X64Reg acc = Gen::EAX); void get_acc_h(int _reg, Gen::X64Reg acc = Gen::EAX, bool sign = true); @@ -246,7 +247,16 @@ private: // CC helpers void Update_SR_Register64(Gen::X64Reg val = Gen::EAX, Gen::X64Reg scratch = Gen::EDX); - void Update_SR_Register64_Carry(Gen::X64Reg val, Gen::X64Reg carry_ovfl, bool carry_eq = false); + void UpdateSR64AddSub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch, + bool subtract); + void UpdateSR64Add(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch) + { + UpdateSR64AddSub(val1, val2, result, scratch, false); + } + void UpdateSR64Sub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch) + { + UpdateSR64AddSub(val1, val2, result, scratch, true); + } void Update_SR_Register16(Gen::X64Reg val = Gen::EAX); void Update_SR_Register16_OverS32(Gen::X64Reg val = Gen::EAX); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp index 633b714124..1a9270cab1 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp @@ -64,18 +64,19 @@ void DSPEmitter::andcf(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - // u16 imm = dsp_fetch_code(); + // const u16 imm = m_dsp_core.DSPState().FetchInstruction(); const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - // u16 val = dsp_get_acc_m(reg); - get_acc_m(reg); - // Update_SR_LZ(((val & imm) == imm) ? true : false); - // if ((val & imm) == imm) - // g_dsp.r.sr |= SR_LOGIC_ZERO; - // else - // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + // const u16 val = GetAccMid(reg); + X64Reg val = RAX; + get_acc_m(reg, val); + // UpdateSRLogicZero((val & imm) == imm); + // if ((val & imm) == imm) + // g_dsp.r.sr |= SR_LOGIC_ZERO; + // else + // g_dsp.r.sr &= ~SR_LOGIC_ZERO; const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - AND(16, R(RAX), Imm16(imm)); - CMP(16, R(RAX), Imm16(imm)); + AND(16, R(val), Imm16(imm)); + CMP(16, R(val), Imm16(imm)); FixupBranch notLogicZero = J_CC(CC_NE); OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); @@ -99,17 +100,18 @@ void DSPEmitter::andf(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - // u16 imm = dsp_fetch_code(); + // const u16 imm = m_dsp_core.DSPState().FetchInstruction(); const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - // u16 val = dsp_get_acc_m(reg); - get_acc_m(reg); - // Update_SR_LZ(((val & imm) == 0) ? true : false); - // if ((val & imm) == 0) - // g_dsp.r.sr |= SR_LOGIC_ZERO; - // else - // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + // const u16 val = GetAccMid(reg); + X64Reg val = RAX; + get_acc_m(reg, val); + // UpdateSRLogicZero((val & imm) == 0); + // if ((val & imm) == 0) + // g_dsp.r.sr |= SR_LOGIC_ZERO; + // else + // g_dsp.r.sr &= ~SR_LOGIC_ZERO; const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - TEST(16, R(RAX), Imm16(imm)); + TEST(16, R(val), Imm16(imm)); FixupBranch notLogicZero = J_CC(CC_NE); OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); @@ -167,18 +169,21 @@ void DSPEmitter::cmp(const UDSPInstruction opc) { if (FlagsNeeded()) { + // const s64 acc0 = GetLongAcc(0); + X64Reg acc0 = RAX; + get_long_acc(0, acc0); + // const s64 acc1 = GetLongAcc(1); + X64Reg acc1 = RDX; + get_long_acc(1, acc1); + // s64 res = dsp_convert_long_acc(acc0 - acc1); + X64Reg res = RCX; + MOV(64, R(res), R(acc0)); + SUB(64, R(res), R(acc1)); + dsp_convert_long_acc(RCX); + + // UpdateSR64Sub(acc0, acc1, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc0 = dsp_get_long_acc(0); - get_long_acc(0, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc1 = dsp_get_long_acc(1); - get_long_acc(1, RDX); - // s64 res = dsp_convert_long_acc(acc0 - acc1); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(acc0, res), isOverflow(acc0, -acc1, res)); // CF -> - // influence on ABS/0xa100 - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc0, acc1, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -195,19 +200,22 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) u8 rreg = ((opc >> 12) & 0x1); u8 sreg = (opc >> 11) & 0x1; + // const s64 acc = GetLongAcc(sreg); + X64Reg acc = RAX; + get_long_acc(sreg, acc); + // s64 ax = GetAXHigh(rreg); + X64Reg ax = RDX; + get_ax_h(rreg, ax); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = dsp_convert_long_acc(acc - ax); + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(ax)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(acc, ax, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 sr = dsp_get_long_acc(sreg); - get_long_acc(sreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 rr = (s16)g_dsp.r.axh[rreg]; - get_ax_h(rreg, RDX); - // rr <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = dsp_convert_long_acc(sr - rr); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc, ax, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -224,19 +232,24 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - const X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 val = dsp_get_long_acc(reg); - get_long_acc(reg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s64)(s16)dsp_fetch_code() << 16; // Immediate is considered to be at M level in - // the 40-bit accumulator. - const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - MOV(64, R(RDX), Imm64((s64)(s16)imm << 16)); - // s64 res = dsp_convert_long_acc(val - imm); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(val, res), isOverflow(val, -imm, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + // const s64 val = GetLongAcc(reg); + X64Reg val = RAX; + get_long_acc(reg, val); + // Immediate is considered to be at M level in the 40-bit accumulator. + // s64 imm = static_cast(state.FetchInstruction()); + // imm <<= 16; + X64Reg imm_reg = RDX; + s64 imm = static_cast(m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1)); + imm <<= 16; + MOV(64, R(imm_reg), Imm64(imm)); + // const s64 res = dsp_convert_long_acc(val - imm); + X64Reg res = RCX; + MOV(64, R(res), R(val)); + SUB(64, R(res), R(imm_reg)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(val, imm, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(val, imm_reg, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -253,18 +266,23 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) if (FlagsNeeded()) { u8 areg = (opc >> 8) & 0x1; - // s64 acc = dsp_get_long_acc(areg); + // const s64 acc = GetLongAcc(areg); + X64Reg acc = RAX; + get_long_acc(areg, acc); + // s64 imm = static_cast(opc); + // imm <<= 16; + X64Reg imm_reg = RDX; + s64 imm = static_cast(opc); + imm <<= 16; + MOV(64, R(imm_reg), Imm64(imm)); + // const s64 res = dsp_convert_long_acc(acc - imm); + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(imm_reg)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(acc, imm, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - get_long_acc(areg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 val = (s8)opc; - // val <<= 16; - MOV(64, R(RDX), Imm64((s64)(s8)opc << 16)); - // s64 res = dsp_convert_long_acc(acc - val); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc, imm_reg, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -521,29 +539,27 @@ void DSPEmitter::addr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - // s64 acc = dsp_get_long_acc(dreg); - X64Reg tmp1 = m_gpr.GetFreeXReg(); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = (s16)g_dsp.r[sreg]; - dsp_op_read_reg(sreg, RDX, RegisterExtension::Sign); - // ax <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = acc + ax; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 ax = ...; + X64Reg ax = RDX; + dsp_op_read_reg(sreg, ax, RegisterExtension::Sign); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = acc + ax; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDAX $acD, $axS @@ -556,28 +572,25 @@ void DSPEmitter::addax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = dsp_get_long_acx(sreg); - get_long_acx(sreg, RDX); - // s64 res = acc + ax; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 ax = GetLongACX(sreg); + X64Reg ax = RDX; + get_long_acx(sreg, ax); + // const s64 res = acc + ax; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADD $acD, $ac(1-D) @@ -589,28 +602,25 @@ void DSPEmitter::add(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc0 = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc1 = dsp_get_long_acc(1 - dreg); - get_long_acc(1 - dreg, RDX); - // s64 res = acc0 + acc1; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); + // const s64 acc0 = GetLongAcc(dreg); + X64Reg acc0 = RAX; + get_long_acc(dreg, acc0); + // const s64 acc1 = GetLongAcc(1 - dreg); + X64Reg acc1 = RDX; + get_long_acc(1 - dreg, acc1); + // const s64 res = acc0 + acc1; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc0, acc1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc0, acc1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc0, acc1, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDP $acD @@ -622,28 +632,25 @@ void DSPEmitter::addp(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 prod = dsp_get_long_prod(); - get_long_prod(RDX); - // s64 res = acc + prod; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 prod = GetLongProduct(); + X64Reg prod = RDX; + get_long_prod(prod); + // const s64 res = acc + prod; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, prod)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, prod, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, prod, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDAXL $acD, $axS.l @@ -657,29 +664,26 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // u64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // u16 acx = (u16)dsp_get_ax_l(sreg); - get_ax_l(sreg, RDX); - MOVZX(64, 16, RDX, R(RDX)); - // u64 res = acc + acx; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, (s64)res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64((s64)res, isCarry(acc, res), isOverflow((s64)acc, (s64)acx, (s64)res)); + // const u64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const u16 acx = static_cast(GetAXLow(sreg)); + X64Reg acx = RDX; + get_ax_l(sreg, acx); + MOVZX(64, 16, acx, R(acx)); + // const u64 res = acc + acx; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, acx)); + // SetLongAcc(dreg, static_cast(res)); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, acx, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, acx, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDI $amR, #I @@ -691,30 +695,30 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) void DSPEmitter::addi(const UDSPInstruction opc) { u8 areg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s16)dsp_fetch_code(); - const s16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); + // const s64 acc = GetLongAcc(areg); + X64Reg acc = RAX; + get_long_acc(areg, acc); + // s64 imm = static_cast(state.FetchInstruction()); // imm <<= 16; - MOV(64, R(RDX), Imm32(imm << 16)); - // s64 res = acc + imm; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(areg, res); - // res = dsp_get_long_acc(areg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + s64 imm = static_cast(m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1)); + imm <<= 16; + // const s64 res = acc + imm; + X64Reg res = RCX; + // Can safely use LEA as we are using a 16-bit sign-extended immediate shifted left by 16, which + // fits in a signed 32-bit immediate + LEA(64, res, MDisp(acc, static_cast(imm))); + // SetLongAcc(areg, res); + set_long_acc(areg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(areg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, imm, GetLongAcc(areg)); + get_long_acc(areg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(imm)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(areg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDIS $acD, #I @@ -726,30 +730,28 @@ void DSPEmitter::addis(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s8)(u8)opc; - // imm <<= 16; - s32 imm = static_cast(opc) << 24 >> 8; - MOV(64, R(RDX), Imm32(imm)); - // s64 res = acc + imm; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 imm = static_cast(opc); + // imm <<= 16; + s64 imm = static_cast(opc); + imm <<= 16; + // const s64 res = acc + imm; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, static_cast(imm))); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, imm, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(imm)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // INCM $acsD @@ -761,26 +763,24 @@ void DSPEmitter::incm(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; s64 subtract = 0x10000; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc + sub; - LEA(64, RAX, MDisp(tmp1, subtract)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, subtract, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc + sub; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, static_cast(subtract))); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm32((u32)subtract)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, sub, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(subtract)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } // INC $acD @@ -791,26 +791,24 @@ void DSPEmitter::incm(const UDSPInstruction opc) void DSPEmitter::inc(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc + 1; - LEA(64, RAX, MDisp(tmp1, 1)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, 1, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc + 1; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, 1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(1)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, 1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(1)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } //---- @@ -825,31 +823,28 @@ void DSPEmitter::subr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = (s16)g_dsp.r[sreg]; - dsp_op_read_reg(sreg, RDX, RegisterExtension::Sign); - // ax <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = acc - ax; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 ax = ...; + X64Reg ax = RDX; + dsp_op_read_reg(sreg, ax, RegisterExtension::Sign); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = acc - ax; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUBAX $acD, $axS @@ -862,29 +857,26 @@ void DSPEmitter::subax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acx = dsp_get_long_acx(sreg); - get_long_acx(sreg, RDX); - // s64 res = acc - acx; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -acx, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 acx = GetLongACX(sreg); + X64Reg acx = RDX; + get_long_acx(sreg, acx); + // const s64 res = acc - acx; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(acx)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, acx, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, acx, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUB $acD, $ac(1-D) @@ -895,29 +887,26 @@ void DSPEmitter::subax(const UDSPInstruction opc) void DSPEmitter::sub(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc1 = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc2 = dsp_get_long_acc(1 - dreg); - get_long_acc(1 - dreg, RDX); - // s64 res = acc1 - acc2; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc1, res), isOverflow(acc1, -acc2, res)); + // const s64 acc1 = GetLongAcc(dreg); + X64Reg acc1 = RAX; + get_long_acc(dreg, acc1); + // const s64 acc2 = GetLongAcc(1 - dreg); + X64Reg acc2 = RDX; + get_long_acc(1 - dreg, acc2); + // const s64 res = acc1 - acc2; + X64Reg res = RCX; + MOV(64, R(res), R(acc1)); + SUB(64, R(res), R(acc2)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc1, acc2, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc1, acc2, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUBP $acD @@ -928,29 +917,26 @@ void DSPEmitter::sub(const UDSPInstruction opc) void DSPEmitter::subp(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 prod = dsp_get_long_prod(); - get_long_prod(RDX); - // s64 res = acc - prod; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -prod, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 prod = GetLongProduct(); + X64Reg prod = RDX; + get_long_prod(prod); + // const s64 res = acc - prod; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(prod)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, prod, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, prod, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // DECM $acsD @@ -962,26 +948,24 @@ void DSPEmitter::decm(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x01; s64 subtract = 0x10000; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc - sub; - LEA(64, RAX, MDisp(tmp1, -subtract)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -subtract, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc - sub; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, -subtract)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(-subtract)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, sub, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(subtract)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // DEC $acD @@ -992,26 +976,24 @@ void DSPEmitter::decm(const UDSPInstruction opc) void DSPEmitter::dec(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x01; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc - 1; - LEA(64, RAX, MDisp(tmp1, -1)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -1, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc - 1; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, -1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(-1)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, 1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(RDX), Imm64(1)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } //---- @@ -1020,20 +1002,33 @@ void DSPEmitter::dec(const UDSPInstruction opc) // 0111 110d xxxx xxxx // Negate accumulator $acD. // -// flags out: --xx xx00 +// flags out: x-xx xxxx +// +// The carry flag is set only if $acD was zero. +// The overflow flag is set only if $acD was 0x8000000000 (the minimum value), +// as -INT_MIN is INT_MIN in two's complement. In both of these cases, +// the value of $acD after the operation is the same as it was before. void DSPEmitter::neg(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg); - // acc = 0 - acc; - NEG(64, R(RAX)); - // dsp_set_long_acc(dreg, acc); - set_long_acc(dreg); - // Update_SR_Register64(dsp_get_long_acc(dreg)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = 0 - acc; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + NEG(64, R(res)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - Update_SR_Register64(); + // UpdateSR64Sub(0, acc, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + XOR(64, R(imm_reg), R(imm_reg)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(imm_reg, acc, res, tmp1); + m_gpr.PutXReg(tmp1); } } diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index 043e5f2043..81ee4f6d8d 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -54,24 +54,43 @@ void DSPEmitter::ReJitConditional(const UDSPInstruction opc, break; case 0xa: // ? case 0xb: // ? + // We want to test this expression, which corresponds to xB: + // (!(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS))) || IsSRFlagSet(SR_ARITH_ZERO) + // The xB expression is used due to even instructions (i.e. xA) looking for the expression to + // evaluate to false, while odd ones look for it to be true. + + // Since SR_OVER_S32 is bit 4 (0x10) and SR_TOP2BITS is bit 5 (0x20), + // set EDX to 2*EAX, so that SR_OVER_S32 is in bit 5 of EDX. LEA(16, EDX, MRegSum(EAX, EAX)); - OR(16, R(EAX), R(EDX)); - SHL(16, R(EDX), Imm8(3)); - NOT(16, R(EAX)); - OR(16, R(EAX), R(EDX)); - TEST(16, R(EAX), Imm16(0x20)); + // Now OR them together, so bit 5 of EDX is + // (IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) + OR(16, R(EDX), R(EAX)); + // EDX bit 5 is !(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) + NOT(16, R(EDX)); + // SR_ARITH_ZERO is bit 2 (0x04). We want that in bit 5, so shift left by 3. + SHL(16, R(EAX), Imm8(3)); + // Bit 5 of EAX is IsSRFlagSet(SR_OVER_S32), so or-ing EDX with EAX gives our target expression. + OR(16, R(EDX), R(EAX)); + // Test bit 5 + TEST(16, R(EDX), Imm16(0x20)); break; case 0xc: // LNZ - Logic Not Zero case 0xd: // LZ - Logic Zero TEST(16, R(EAX), Imm16(SR_LOGIC_ZERO)); break; - case 0xe: // 0 - Overflow + case 0xe: // O - Overflow TEST(16, R(EAX), Imm16(SR_OVERFLOW)); break; } DSPJitRegCache c1(m_gpr); - FixupBranch skip_code = - cond == 0xe ? J_CC(CC_E, true) : J_CC((CCFlags)(CC_NE - (cond & 1)), true); + CCFlags flag; + if (cond == 0xe) // Overflow, special case as there is no inverse case + flag = CC_Z; + else if ((cond & 1) == 0) // Even conditions run if the bit is zero, so jump if it IS NOT zero + flag = CC_NZ; + else // Odd conditions run if the bit IS NOT zero, so jump if it IS zero + flag = CC_Z; + FixupBranch skip_code = J_CC(flag, true); (this->*conditional_fn)(opc); m_gpr.FlushRegs(c1); SetJumpTarget(skip_code); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp index 58676e8ef2..a891d0c483 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp @@ -65,45 +65,52 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val, Gen::X64Reg scratch) Update_SR_Register(val, scratch); } -// In: (val): s64 _Value -// In: (carry_ovfl): 1 = carry, 2 = overflow -// Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry(X64Reg val, X64Reg carry_ovfl, bool carry_eq) +// Updates SR based on a 64-bit value computed by result = val1 + val2 or result = val1 - val2 +// Clobbers scratch +void DSPEmitter::UpdateSR64AddSub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, + Gen::X64Reg scratch, bool subtract) { const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; + // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(carry_ovfl), R(val)); + CMP(64, R(val1), R(result)); + // x86 ZF set if val1 == result + // x86 CF set if val1 < result + // Note that x86 uses a different definition of carry than the DSP // 0x01 - // g_dsp.r[DSP_REG_SR] |= SR_CARRY; - // Carry = (acc>res) - // Carry2 = (acc>=res) - FixupBranch noCarry = J_CC(carry_eq ? CC_B : CC_BE); + // g_dsp.r[DSP_REG_SR] |= SR_CARRY; + // isCarryAdd = (val1 > result) => skip setting if (val <= result) => jump if ZF or CF => use JBE + // isCarrySubtract = (val1 >= result) => skip setting if (val < result) => jump if CF => use JB + FixupBranch noCarry = J_CC(subtract ? CC_B : CC_BE); OR(16, sr_reg, Imm16(SR_CARRY)); SetJumpTarget(noCarry); // 0x02 and 0x80 - // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; - // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; - // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(carry_ovfl), R(val)); - XOR(64, R(RDX), R(val)); - TEST(64, R(carry_ovfl), R(RDX)); + // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; + // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; + // Overflow (add) = ((val1 ^ res) & (val2 ^ res)) < 0 + // Overflow (sub) = ((val1 ^ res) & (-val2 ^ res)) < 0 + MOV(64, R(scratch), R(val1)); + XOR(64, R(scratch), R(result)); + + if (subtract) + NEG(64, R(val2)); + XOR(64, R(result), R(val2)); + + TEST(64, R(scratch), R(result)); // Test scratch & value FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); + // Restore result and val2 -- TODO: does this really matter? + XOR(64, R(result), R(val2)); + if (subtract) + NEG(64, R(val2)); + m_gpr.PutReg(DSP_REG_SR); - if (carry_eq) - { - Update_SR_Register(); - } - else - { - Update_SR_Register(val); - } + Update_SR_Register(result, scratch); } // In: RAX: s64 _Value diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp index efbd59d164..7fba07f974 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp @@ -259,13 +259,14 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) // s64 oldprod = dsp_get_long_prod(); // dsp_set_long_acc(dreg, res); // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(oldprod, res), false); + // Update_SR_Register64(res, isCarryAdd(oldprod, res), false); if (FlagsNeeded()) { get_long_prod(RDX); MOV(64, R(RCX), R(RAX)); set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // TODO: Why does this not set the overflow bit? (And thus, why can't it use UpdateSR64Add?) + Update_SR_Register64(EAX, tmp1); } else { diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp index 1d3b2768de..2354915b31 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp @@ -704,23 +704,6 @@ OpArg DSPJitRegCache::GetReg(int reg, bool load) const OpArg oparg = m_regs[real_reg].loc; m_regs[real_reg].used = true; - // do some register specific fixup - switch (reg) - { - case DSP_REG_ACC0_64: - case DSP_REG_ACC1_64: - if (load) - { - // need to do this because interpreter only does 48 bits - // (and PutReg does the same) - m_emitter.SHL(64, oparg, Imm8(64 - 40)); // sign extend - m_emitter.SAR(64, oparg, Imm8(64 - 40)); - } - break; - default: - break; - } - return oparg; } @@ -738,15 +721,13 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) case DSP_REG_ACH1: if (dirty) { - // no need to extend to full 64bit here until interpreter - // uses that if (oparg.IsSimpleReg()) { // register is already shifted correctly // (if at all) // sign extend from the bottom 8 bits. - m_emitter.MOVSX(16, 8, oparg.GetSimpleReg(), oparg); + m_emitter.MOVSX(32, 8, oparg.GetSimpleReg(), oparg); } else if (oparg.IsImm()) { @@ -759,8 +740,8 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) // of real_reg, since it has the right loc X64Reg tmp = GetFreeXReg(); // Sign extend from the bottom 8 bits. - m_emitter.MOVSX(16, 8, tmp, m_regs[reg].loc); - m_emitter.MOV(16, m_regs[reg].loc, R(tmp)); + m_emitter.MOVSX(32, 8, tmp, m_regs[reg].loc); + m_emitter.MOV(32, m_regs[reg].loc, R(tmp)); PutXReg(tmp); } } diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp index 4957947b40..bbf97b98c7 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp @@ -690,7 +690,15 @@ void DSPEmitter::set_long_prod() m_gpr.PutReg(DSP_REG_PROD_64, true); } -// Returns s64 in RAX +// s64 -> s40 in long_acc +void DSPEmitter::dsp_convert_long_acc(Gen::X64Reg long_acc) +{ + // return ((long_acc << (64 - 40)) >> (64 - 40)) + SHL(64, R(long_acc), Imm8(64 - 40)); // sign extend + SAR(64, R(long_acc), Imm8(64 - 40)); +} + +// Returns s64 in long_acc void DSPEmitter::round_long_acc(X64Reg long_acc) { // if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff; diff --git a/Source/DSPSpy/tests/cond_test.ds b/Source/DSPSpy/tests/cond_test.ds new file mode 100644 index 0000000000..151f3e3a9a --- /dev/null +++ b/Source/DSPSpy/tests/cond_test.ds @@ -0,0 +1,248 @@ +incdir "tests" +include "dsp_base.inc" + +test_main: + CLR $acc0 + CLR $acc1 + CALL test_cond + ; 1. ar0: 9969. ac0.h: 0000. sr: 2224 + + LRI $ac0.h, #0x0050 + CALL test_cond + ; 2. ar0: 9969. ac0.h: 0050. sr: 2224. LRI doesn't change sr. + + TST $acc0 + CALL test_cond + ; 3. ar0: 9655. ac0.h: 0050. sr: 2230 + + LRI $ac1.h, #0x0050 + ADD $acc0, $acc1 ; Causes acc0 to overflow, and thus also become negative + CALL test_cond + ; 4. ar0: d655. ac0.h: ffa0. sr: 22ba + + ADD $acc0, $acc1 ; acc0 is now negative, but not overflowed + CALL test_cond + ; 5. ar0: 965a. ac0.h: fff0. sr: 22b8 + + ADD $acc0, $acc1 ; Triggers carry + CALL test_cond + ; 6. ar0: 9695. ac0.h: 0040. sr: 22b1 + + CLR $acc1 + ADD $acc0, $acc1 ; Adding 0 should do nothing + CALL test_cond + ; 7. ar0: 9655. ac0.h: 0040. sr: 22b0 + + SUB $acc0, $acc1 ; Subtracting 0 sets the carry flag + CALL test_cond + ; 8. ar0: 9695. ac0.h: 0040. sr: 22b1 + + LRI $ac1.h, #0x0050 + SUB $acc0, $acc1 ; No carry + CALL test_cond + ; 9. ar0: 965a. ac0.h: fff0. sr: 22b8 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 10. ar0: 969a. ac0.h: ffa0. sr: 22b9 + + SUB $acc0, $acc1 ; Carry and overflow + CALL test_cond + ; 11. ar0: d69a. ac0.h: 0050. sr: 22b3 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 12. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + LRI $ac1.h, #0xffb0 ; -0x50 + SUB $acc0, $acc1 ; No carry or overflow + CALL test_cond + ; 13. ar0: 9655. ac0.h: 0050. sr: 22b0 + + SUB $acc0, $acc1 ; Overflow, no carry + CALL test_cond + ; 14. ar0: d655. ac0.h: ffa0. sr: 22ba + + SUB $acc0, $acc1 ; No carry or overflow + CALL test_cond + ; 15. ar0: 965a. ac0.h: fff0. sr: 22b8 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 16. ar0: 9695. ac0.h: 0040. sr: 22b1 + + LRI $ac1.h, #0xff80 + SUB $acc0, $acc1 ; Overflow, no carry + CALL test_cond + ; 17. ar0: d655. ac0.h: ffc0. sr: 22ba + + ADD $acc0, $acc1 ; Overflow and carry + CALL test_cond + ; 18. ar0: d69a. ac0.h: 0040. sr: 22b3 + + LRI $ac1.h, #0xffb0 + ADD $acc0, $acc1 ; No overflow or carry + CALL test_cond + ; 19. ar0: 965a. ac0.h: fff0. sr: 22b8 + + ADD $acc0, $acc1 ; Carry + CALL test_cond + ; 20. ar0: 969a. ac0.h: ffa0. sr: 22b9 + + ADD $acc0, $acc1 ; Overflow and carry + CALL test_cond + ; 21. ar0: d69a. ac0.h: 0050. sr: 22b3 + + ADD $acc0, $acc1 ; Carry + CALL test_cond + ; 22. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + CLR $acc1 + CMP ; Compare 0 with 0. Results in 0 and carry. + CALL test_cond + ; 23. ar0: 99a9. sr: 22a5 + + ; Logic zero tests + LRIS $ac0.m, #0x01 + ANDF $ac0.m, #0x0000 + CALL test_cond + ; 24. ar0: a9a9. sr: 22e5 + + ANDCF $ac0.m, #0x0000 + CALL test_cond + ; 25. ar0: a9a9. sr: 22e5 + + ANDF $ac0.m, #0x0001 + CALL test_cond + ; 26. ar0: 99a9. sr: 22a5 + + ANDCF $ac0.m, #0x0001 + CALL test_cond + ; 27. ar0: a9a9. sr: 22e5 + + ANDF $ac0.m, #0x0002 + CALL test_cond + ; 28. ar0: a9a9. sr: 22e5 + + ANDCF $ac0.m, #0x0002 + CALL test_cond + ; 29. ar0: 99a9. sr: 22a5 + + ANDF $ac0.m, #0x0003 + CALL test_cond + ; 30. ar0: 99a9. sr: 22a5 + + ANDCF $ac0.m, #0x0003 + CALL test_cond + ; 31. ar0: 99a9. sr: 22a5 + + CLR $acc0 + NEG $acc0 ; 0 - 0, marked as carry + CALL test_cond + ; 32. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + LRI $ac0.h, #0x0010 + NEG $acc0 + CALL test_cond + ; 33. ar0: 965a. ac0.h: fff0. sr: 22b8 + + NEG $acc0 + CALL test_cond + ; 34. ar0: 9655. ac0.h: 0010. sr: 22b0 + + LRI $ac0.h, #0xff80 + NEG $acc0 ; -INT_MIN is INT_MIN. This generates an overflow. + CALL test_cond + ; 35. ar0: d655. ac0.h: ff80. sr: 22ba + + CMP ; Compare INT_MIN with 0. Carry but no overflow. + CALL test_cond + ; 36. ar0: 969a. ac0.h: ff80. sr: 22b9 + + MOV $acc1, $acc0 + CALL test_cond + ; 37. ar0: 965a. ac0.h: ff80. sr: 22b8 + + TST $acc1 + CALL test_cond + ; 38. ar0: 965a. ac0.h: ff80. sr: 22b8 + + CLR $acc0 + CMP ; Compare 0 with INT_MIN. Overflow but no carry. + CALL test_cond + ; 39. ar0: d655. ac0.h: 0000. sr: 22ba + +; We're done, DO NOT DELETE THIS LINE + JMP end_of_test + +; Test all conditionals, setting bits in $AR0 based on it. +; $AR0 is used because ADDARN does not update flags. +test_cond: + LRI $AR0, #0x0000 + + LRI $IX0, #0x0001 + IFGE + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0002 + IFL + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0004 + IFG + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0008 + IFLE + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0010 + IFNZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0020 + IFZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0040 + IFNC + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0080 + IFC + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0100 + CW 0x0278 ; IFx8 + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0200 + CW 0x0279 ; IFx9 + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0400 + CW 0x027A ; IFxA + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0800 + CW 0x027B ; IFxB + ADDARN $AR0, $IX0 + + LRI $IX0, #0x1000 + IFLNZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x2000 + IFLZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x4000 + IFO + ADDARN $AR0, $IX0 + + LRI $IX0, #0x8000 + IF ; Always true + ADDARN $AR0, $IX0 + + CALL send_back + RET diff --git a/Source/DSPSpy/tests/less_test.ds b/Source/DSPSpy/tests/less_test.ds deleted file mode 100644 index 20be209b73..0000000000 --- a/Source/DSPSpy/tests/less_test.ds +++ /dev/null @@ -1,17 +0,0 @@ -incdir "tests" -include "dsp_base.inc" - -test_main: - CLR $acc0 - CLR $acc1 - LRI $ac0.h, #0x0050 - LRI $ac1.h, #0x0050 - ADD $acc0, $acc1 ; Causes acc0 to overflow, and thus also become negative - - LRI $AX0.L, #0x0000 - IFL - LRI $AX0.L, #0x0001 - CALL send_back - -; We're done, DO NOT DELETE THIS LINE - JMP end_of_test