JIT compiler:

* Improved constant folding/propagation in integer instructions
* Merged boolean instructions

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6063 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
dok.slade 2010-08-06 19:35:40 +00:00
parent 05401aeb0e
commit 1faff6aeb7
3 changed files with 673 additions and 480 deletions

View File

@ -170,14 +170,6 @@ public:
void addx(UGeckoInstruction inst);
void addcx(UGeckoInstruction inst);
void orx(UGeckoInstruction inst);
void orcx(UGeckoInstruction inst);
void norx(UGeckoInstruction inst);
void xorx(UGeckoInstruction inst);
void eqvx(UGeckoInstruction inst);
void andx(UGeckoInstruction inst);
void nandx(UGeckoInstruction inst);
void andcx(UGeckoInstruction inst);
void mulli(UGeckoInstruction inst);
void mulhwux(UGeckoInstruction inst);
void mullwx(UGeckoInstruction inst);
@ -209,6 +201,7 @@ public:
void mcrf(UGeckoInstruction inst);
void mcrxr(UGeckoInstruction inst);
void boolX(UGeckoInstruction inst);
void crXXX(UGeckoInstruction inst);
void reg_imm(UGeckoInstruction inst);

View File

@ -193,14 +193,14 @@ static GekkoOPTemplate table19[] =
static GekkoOPTemplate table31[] =
{
{28, &Jit64::andx}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{60, &Jit64::andcx}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{444, &Jit64::orx}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{124, &Jit64::norx}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{316, &Jit64::xorx}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{412, &Jit64::orcx}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{476, &Jit64::nandx}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, &Jit64::eqvx}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{28, &Jit64::boolX}, //"andx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{60, &Jit64::boolX}, //"andcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{444, &Jit64::boolX}, //"orx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{124, &Jit64::boolX}, //"norx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{316, &Jit64::boolX}, //"xorx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{412, &Jit64::boolX}, //"orcx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{476, &Jit64::boolX}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{284, &Jit64::boolX}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}},
{0, &Jit64::cmpXX}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{32, &Jit64::cmpXX}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}},
{26, &Jit64::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},

View File

@ -76,20 +76,17 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
gpr.Lock(d, a);
if (a || binary || carry) // yeh nasty special case addic
{
if (a == d)
if (gpr.R(a).IsImm() && !carry)
{
if (gpr.R(d).IsImm() && !carry)
{
gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value));
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
}
else
else if (a == d)
{
gpr.KillImmediate(d, true, true);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry)
GenerateCarry();
}
}
else
{
gpr.BindToRegister(d, false);
@ -115,7 +112,6 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
}
if (Rc)
{
// Todo - special case immediates.
ComputeRC(gpr.R(d));
}
gpr.UnlockAll();
@ -184,8 +180,8 @@ void Jit64::reg_imm(UGeckoInstruction inst)
case 29: regimmop(a, s, true, inst.UIMM << 16, And, &XEmitter::AND, true); break;
case 26: regimmop(a, s, true, inst.UIMM, Xor, &XEmitter::XOR, false); break; //xori
case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, &XEmitter::XOR, false); break; //xoris
case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, false, true); //addic
case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, XEmitter::ADD, true, true); //addic_rc
case 12: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, false, true); break; //addic
case 13: regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, &XEmitter::ADD, true, true); break; //addic_rc
default:
Default(inst);
break;
@ -216,47 +212,130 @@ void Jit64::cmpXX(UGeckoInstruction inst)
}
}
Gen::CCFlags less_than, greater_than;
OpArg comparand;
bool signedCompare;
if (inst.OPCD == 31) {
// cmp / cmpl
gpr.Lock(a, b);
gpr.BindToRegister(a, true, false);
comparand = gpr.R(b);
if (inst.SUBOP10 == 32) {
//cmpl
less_than = CC_B;
greater_than = CC_A;
} else {
//cmp
less_than = CC_L;
greater_than = CC_G;
}
signedCompare = (inst.SUBOP10 == 0);
}
else {
gpr.Lock(a);
gpr.KillImmediate(a, true, false); // todo, optimize instead, but unlikely to make a difference
if (inst.OPCD == 10) {
//cmpli
less_than = CC_B;
greater_than = CC_A;
comparand = Imm32(inst.UIMM);
comparand = Imm32((u32)inst.UIMM);
signedCompare = false;
} else if (inst.OPCD == 11) {
//cmpi
less_than = CC_L;
greater_than = CC_G;
comparand = Imm32((s32)(s16)inst.UIMM);
comparand = Imm32((u32)(s32)(s16)inst.UIMM);
signedCompare = true;
} else {
PanicAlert("cmpXX");
less_than = CC_O;
greater_than = CC_O;
comparand = Imm32(0);
}
}
if (gpr.R(a).IsImm() && comparand.IsImm())
{
// Both registers contain immediate values, so we can pre-compile the compare result
u8 compareResult;
if (signedCompare)
{
if ((s32)gpr.R(a).offset == (s32)comparand.offset)
compareResult = 0x2;
else if ((s32)gpr.R(a).offset > (s32)comparand.offset)
compareResult = 0x4;
else
compareResult = 0x8;
}
else
{
if ((u32)gpr.R(a).offset == (u32)comparand.offset)
compareResult = 0x2;
else if ((u32)gpr.R(a).offset > (u32)comparand.offset)
compareResult = 0x4;
else
compareResult = 0x8;
}
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(compareResult));
gpr.UnlockAll();
if (merge_branch)
{
js.downcountAmount++;
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
int test_bit = 8 >> (js.next_inst.BI & 3);
u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0;
if ((compareResult & test_bit) == conditionResult)
{
if (js.next_inst.OPCD == 16) // bcx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
u32 destination;
if (js.next_inst.AA)
destination = SignExt16(js.next_inst.BD << 2);
else
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
WriteExit(destination, 0);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
{
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
MOV(32, R(EAX), M(&CTR));
AND(32, R(EAX), Imm32(0xFFFFFFFC));
WriteExitDestInEAX(0);
}
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
{
MOV(32, R(EAX), M(&LR));
if (js.next_inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
WriteExitDestInEAX(0);
}
else
{
PanicAlert("WTF invalid branch");
}
}
else
{
WriteExit(js.next_compilerPC + 4, 0);
}
js.cancel = true;
}
}
else
{
Gen::CCFlags less_than, greater_than;
if (signedCompare)
{
less_than = CC_L;
greater_than = CC_G;
}
else
{
less_than = CC_B;
greater_than = CC_A;
}
if (gpr.R(a).IsImm() || (!gpr.R(a).IsSimpleReg() && !comparand.IsImm() && !comparand.IsSimpleReg()))
{
// Syntax for CMP is invalid with such arguments. We must load RA in a register.
gpr.BindToRegister(a, true, false);
}
CMP(32, gpr.R(a), comparand);
gpr.UnlockAll();
if (!merge_branch)
{
// Keep the normal code separate for clarity.
CMP(32, gpr.R(a), comparand);
FixupBranch pLesser = J_CC(less_than);
FixupBranch pGreater = J_CC(greater_than);
@ -275,8 +354,6 @@ void Jit64::cmpXX(UGeckoInstruction inst)
js.downcountAmount++;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? false : true;
CMP(32, gpr.R(a), comparand);
gpr.UnlockAll();
// Test swapping (in the future, will be used to inline across branches the right way)
// if (rand() & 1)
@ -340,151 +417,198 @@ void Jit64::cmpXX(UGeckoInstruction inst)
js.cancel = true;
}
}
gpr.UnlockAll();
}
void Jit64::orx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA;
int s = inst.RS;
int b = inst.RB;
if (s == b && s != a)
{
gpr.Lock(a,s);
gpr.BindToRegister(a, false);
MOV(32, gpr.R(a), gpr.R(s));
gpr.UnlockAll();
}
else
{
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
if (a == s)
OR(32, gpr.R(a), gpr.R(b));
else if (a == b)
OR(32, gpr.R(a), gpr.R(s));
else {
MOV(32, gpr.R(a), gpr.R(b));
OR(32, gpr.R(a), gpr.R(s));
}
gpr.UnlockAll();
}
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
void Jit64::orcx(UGeckoInstruction inst)
void Jit64::boolX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB;
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(b));
NOT(32, R(EAX));
OR(32, R(EAX), gpr.R(s));
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
_dbg_assert_msg_(DYNA_REC, inst.OPCD == 31, "Invalid boolX");
if (inst.Rc) {
// result is already in eax
ComputeRC(R(EAX));
if (gpr.R(s).IsImm() && gpr.R(b).IsImm())
{
if (inst.SUBOP10 == 28) /* andx */
gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 476) /* nandx */
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset & (u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 60) /* andcx */
gpr.SetImmediate32(a, (u32)gpr.R(s).offset & (~(u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 444) /* orx */
gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 124) /* norx */
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset | (u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 412) /* orcx */
gpr.SetImmediate32(a, (u32)gpr.R(s).offset | (~(u32)gpr.R(b).offset));
else if (inst.SUBOP10 == 316) /* xorx */
gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 284) /* eqvx */
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset));
}
}
void Jit64::norx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA;
int s = inst.RS;
int b = inst.RB;
if (s == b && s != a)
else if (s == b)
{
if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */))
{
if (a != s)
{
gpr.Lock(a,s);
gpr.BindToRegister(a, false);
gpr.BindToRegister(a, false, true);
MOV(32, gpr.R(a), gpr.R(s));
NOT(32, gpr.R(a));
gpr.UnlockAll();
}
}
else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */))
{
if (a != s)
{
gpr.Lock(a,s);
gpr.BindToRegister(a, false, true);
MOV(32, gpr.R(a), gpr.R(s));
}
else
{
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
if (a == s)
OR(32, gpr.R(a), gpr.R(b));
else if (a == b)
OR(32, gpr.R(a), gpr.R(s));
else {
MOV(32, gpr.R(a), gpr.R(b));
OR(32, gpr.R(a), gpr.R(s));
gpr.KillImmediate(a, true, true);
}
NOT(32, gpr.R(a));
gpr.UnlockAll();
}
if (inst.Rc)
else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */))
{
ComputeRC(gpr.R(a));
gpr.SetImmediate32(a, 0xFFFFFFFF);
}
}
// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB];
void Jit64::xorx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA;
int s = inst.RS;
int b = inst.RB;
if (s == b) {
else if ((inst.SUBOP10 == 60 /* andcx */) || (inst.SUBOP10 == 316 /* xorx */))
{
gpr.SetImmediate32(a, 0);
}
else
{
gpr.Lock(a, s, b);
gpr.BindToRegister(a, a == s || a == b, true);
MOV(32, R(EAX), gpr.R(s));
XOR(32, R(EAX), gpr.R(b));
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
PanicAlert("WTF!");
}
if (inst.Rc)
}
else if ((a == s) || (a == b))
{
ComputeRC(gpr.R(a));
gpr.Lock(a,((a == s) ? b : s));
OpArg operand = ((a == s) ? gpr.R(b) : gpr.R(s));
gpr.BindToRegister(a, true, true);
if (inst.SUBOP10 == 28) /* andx */
{
AND(32, gpr.R(a), operand);
}
}
void Jit64::eqvx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA;
int s = inst.RS;
int b = inst.RB;
if (s == b) {
gpr.SetImmediate32(a, 0);
else if (inst.SUBOP10 == 476) /* nandx */
{
AND(32, gpr.R(a), operand);
NOT(32, gpr.R(a));
}
else if (inst.SUBOP10 == 60) /* andcx */
{
if (a == b)
{
NOT(32, gpr.R(a));
AND(32, gpr.R(a), operand);
}
else
{
gpr.Lock(a, s, b);
gpr.BindToRegister(a, a == s || a == b, true);
MOV(32, R(EAX), gpr.R(s));
XOR(32, R(EAX), gpr.R(b));
MOV(32, R(EAX), operand);
NOT(32, R(EAX));
MOV(32, gpr.R(a), R(EAX));
AND(32, gpr.R(a), R(EAX));
}
}
else if (inst.SUBOP10 == 444) /* orx */
{
OR(32, gpr.R(a), operand);
}
else if (inst.SUBOP10 == 124) /* norx */
{
OR(32, gpr.R(a), operand);
NOT(32, gpr.R(a));
}
else if (inst.SUBOP10 == 412) /* orcx */
{
if (a == b)
{
NOT(32, gpr.R(a));
OR(32, gpr.R(a), operand);
}
else
{
MOV(32, R(EAX), operand);
NOT(32, R(EAX));
OR(32, gpr.R(a), R(EAX));
}
}
else if (inst.SUBOP10 == 316) /* xorx */
{
XOR(32, gpr.R(a), operand);
}
else if (inst.SUBOP10 == 284) /* eqvx */
{
XOR(32, gpr.R(a), operand);
NOT(32, gpr.R(a));
}
else
{
PanicAlert("WTF");
}
gpr.UnlockAll();
}
else
{
gpr.Lock(a,s,b);
gpr.BindToRegister(a, false, true);
if (inst.SUBOP10 == 28) /* andx */
{
MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b));
}
else if (inst.SUBOP10 == 476) /* nandx */
{
MOV(32, gpr.R(a), gpr.R(s));
AND(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
}
else if (inst.SUBOP10 == 60) /* andcx */
{
MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
AND(32, gpr.R(a), gpr.R(s));
}
else if (inst.SUBOP10 == 444) /* orx */
{
MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b));
}
else if (inst.SUBOP10 == 124) /* norx */
{
MOV(32, gpr.R(a), gpr.R(s));
OR(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
}
else if (inst.SUBOP10 == 412) /* orcx */
{
MOV(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
OR(32, gpr.R(a), gpr.R(s));
}
else if (inst.SUBOP10 == 316) /* xorx */
{
MOV(32, gpr.R(a), gpr.R(s));
XOR(32, gpr.R(a), gpr.R(b));
}
else if (inst.SUBOP10 == 284) /* eqvx */
{
MOV(32, gpr.R(a), gpr.R(s));
XOR(32, gpr.R(a), gpr.R(b));
NOT(32, gpr.R(a));
}
else
{
PanicAlert("WTF!");
}
gpr.UnlockAll();
}
@ -494,67 +618,18 @@ void Jit64::eqvx(UGeckoInstruction inst)
}
}
void Jit64::andx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB;
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(s));
AND(32, R(EAX), gpr.R(b));
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
if (inst.Rc) {
// result is already in eax
ComputeRC(R(EAX));
}
}
void Jit64::nandx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB;
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(s));
AND(32, R(EAX), gpr.R(b));
NOT(32, R(EAX));
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
if (inst.Rc) {
// result is already in eax
ComputeRC(R(EAX));
}
}
void Jit64::andcx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS, b = inst.RB;
gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true);
MOV(32, R(EAX), gpr.R(b));
NOT(32, R(EAX));
AND(32, R(EAX), gpr.R(s));
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
if (inst.Rc) {
// result is already in eax
ComputeRC(R(EAX));
}
}
void Jit64::extsbx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS;
if (gpr.R(s).IsImm())
{
gpr.SetImmediate32(a, (u32)(s32)(s8)gpr.R(s).offset);
}
else
{
gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true);
// Always force moving to EAX because it isn't possible
@ -562,10 +637,13 @@ void Jit64::extsbx(UGeckoInstruction inst)
// 32-bit mode.
MOV(32, R(EAX), gpr.R(s));
MOVSX(32, 8, gpr.RX(a), R(AL)); // watch out for ah and friends
if (inst.Rc) {
gpr.UnlockAll();
}
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
gpr.UnlockAll();
}
void Jit64::extshx(UGeckoInstruction inst)
@ -573,6 +651,13 @@ void Jit64::extshx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, s = inst.RS;
if (gpr.R(s).IsImm())
{
gpr.SetImmediate32(a, (u32)(s32)(s16)gpr.R(s).offset);
}
else
{
gpr.Lock(a, s);
gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, a == s, true);
@ -580,10 +665,13 @@ void Jit64::extshx(UGeckoInstruction inst)
// every 32-bit register has a 16-bit half at the same index
// as the 32-bit register.
MOVSX(32, 16, gpr.RX(a), gpr.R(s));
if (inst.Rc) {
gpr.UnlockAll();
}
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
gpr.UnlockAll();
}
void Jit64::subfic(UGeckoInstruction inst)
@ -752,16 +840,25 @@ void Jit64::subfx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
gpr.SetImmediate32(d, (u32)gpr.R(b).offset - (u32)gpr.R(a).offset);
}
else
{
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
MOV(32, R(EAX), gpr.R(b));
SUB(32, R(EAX), gpr.R(a));
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
}
if (inst.OE) PanicAlert("OE: subfx");
if (inst.Rc) {
// result is already in eax
ComputeRC(R(EAX));
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
}
@ -770,11 +867,19 @@ void Jit64::mulli(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, d = inst.RD;
if (gpr.R(a).IsImm())
{
gpr.SetImmediate32(d, (s32)gpr.R(a).offset * (s32)inst.SIMM_16);
}
else
{
gpr.Lock(a, d);
gpr.BindToRegister(d, (d == a), true);
gpr.KillImmediate(a, true, false);
IMUL(32, gpr.RX(d), gpr.R(a), Imm32((u32)(s32)inst.SIMM_16));
gpr.UnlockAll();
}
}
void Jit64::mullwx(UGeckoInstruction inst)
@ -782,6 +887,13 @@ void Jit64::mullwx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
gpr.SetImmediate32(d, (s32)gpr.R(a).offset * (s32)gpr.R(b).offset);
}
else
{
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
if (d == a) {
@ -793,7 +905,10 @@ void Jit64::mullwx(UGeckoInstruction inst)
IMUL(32, gpr.RX(d), gpr.R(a));
}
gpr.UnlockAll();
if (inst.Rc) {
}
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
}
@ -803,6 +918,13 @@ void Jit64::mulhwux(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
gpr.SetImmediate32(d, (u32)(((u64)gpr.R(a).offset * (u64)gpr.R(b).offset) >> 32));
}
else
{
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
@ -814,8 +936,10 @@ void Jit64::mulhwux(UGeckoInstruction inst)
gpr.UnlockAll();
gpr.UnlockAllX();
MOV(32, gpr.R(d), R(EDX));
}
if (inst.Rc)
ComputeRC(R(EDX));
ComputeRC(gpr.R(d));
}
void Jit64::divwux(UGeckoInstruction inst)
@ -823,6 +947,16 @@ void Jit64::divwux(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(Integer)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
if( gpr.R(b).offset == 0 )
gpr.SetImmediate32(d, 0);
else
gpr.SetImmediate32(d, (u32)gpr.R(a).offset / (u32)gpr.R(b).offset);
}
else
{
gpr.FlushLockX(EDX);
gpr.Lock(a, b, d);
gpr.BindToRegister(d, (d == a || d == b), true);
@ -841,8 +975,11 @@ void Jit64::divwux(UGeckoInstruction inst)
SetJumpTarget(end);
gpr.UnlockAll();
gpr.UnlockAllX();
if (inst.Rc) {
ComputeRC(R(EAX));
}
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
}
@ -853,7 +990,11 @@ void Jit64::addx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
_assert_msg_(DYNA_REC, !inst.OE, "Add - OE enabled :(");
if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{
gpr.SetImmediate32(d, (u32)gpr.R(a).offset + (u32)gpr.R(b).offset);
}
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
{
gpr.Lock(a, b, d);
gpr.BindToRegister(d, false);
@ -1026,16 +1167,16 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
JITDISABLE(Integer)
int a = inst.RA;
int s = inst.RS;
if (gpr.R(s).IsImm() && !inst.Rc)
if (gpr.R(s).IsImm())
{
unsigned result = (int)gpr.R(s).offset;
if (inst.SH != 0)
result = _rotl(result, inst.SH);
result &= Helper_Mask(inst.MB, inst.ME);
gpr.SetImmediate32(a, result);
return;
}
else
{
gpr.Lock(a, s);
gpr.BindToRegister(a, a == s);
if (a != s)
@ -1067,6 +1208,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
_assert_msg_(DYNA_REC, written, "W T F!!!");
}
gpr.UnlockAll();
}
if (inst.Rc)
{
@ -1082,6 +1224,13 @@ void Jit64::rlwimix(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
if (gpr.R(a).IsImm() && gpr.R(s).IsImm())
{
u32 mask = Helper_Mask(inst.MB,inst.ME);
gpr.SetImmediate32(a, ((u32)gpr.R(a).offset & ~mask) | (_rotl((u32)gpr.R(s).offset,inst.SH) & mask));
}
else
{
gpr.Lock(a, s);
gpr.KillImmediate(a, true, true);
u32 mask = Helper_Mask(inst.MB, inst.ME);
@ -1092,6 +1241,8 @@ void Jit64::rlwimix(UGeckoInstruction inst)
AND(32, R(EAX), Imm32(mask));
OR(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
}
if (inst.Rc)
{
ComputeRC(gpr.R(a));
@ -1105,6 +1256,12 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
u32 mask = Helper_Mask(inst.MB, inst.ME);
if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
{
gpr.SetImmediate32(a, _rotl((u32)gpr.R(s).offset, (u32)gpr.R(b).offset & 0x1F) & mask);
}
else
{
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
gpr.KillImmediate(a, (a == s || a == b), true);
@ -1116,9 +1273,11 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
if (inst.Rc)
{
ComputeRC(R(EAX));
ComputeRC(gpr.R(a));
}
}
@ -1128,12 +1287,21 @@ void Jit64::negx(UGeckoInstruction inst)
JITDISABLE(Integer)
int a = inst.RA;
int d = inst.RD;
if (gpr.R(a).IsImm())
{
gpr.SetImmediate32(d, ~((u32)gpr.R(a).offset) + 1);
}
else
{
gpr.Lock(a, d);
gpr.BindToRegister(d, a == d, true);
if (a != d)
MOV(32, gpr.R(d), gpr.R(a));
NEG(32, gpr.R(d));
gpr.UnlockAll();
}
if (inst.Rc)
{
ComputeRC(gpr.R(d));
@ -1147,6 +1315,14 @@ void Jit64::srwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
int s = inst.RS;
if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
{
u32 amount = (u32)gpr.R(b).offset;
gpr.SetImmediate32(a, (amount & 0x20) ? 0 : ((u32)gpr.R(s).offset >> (amount & 0x1f)));
}
else
{
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
gpr.BindToRegister(a, a == s || a == b || s == b, true);
@ -1160,9 +1336,11 @@ void Jit64::srwx(UGeckoInstruction inst)
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
if (inst.Rc)
{
ComputeRC(R(EAX));
ComputeRC(gpr.R(a));
}
}
@ -1173,6 +1351,14 @@ void Jit64::slwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;
int s = inst.RS;
if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
{
u32 amount = (u32)gpr.R(b).offset;
gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (u32)gpr.R(s).offset << amount);
}
else
{
gpr.FlushLockX(ECX);
gpr.Lock(a, b, s);
gpr.BindToRegister(a, a == s || a == b || s == b, true);
@ -1186,9 +1372,11 @@ void Jit64::slwx(UGeckoInstruction inst)
MOV(32, gpr.R(a), R(EAX));
gpr.UnlockAll();
gpr.UnlockAllX();
}
if (inst.Rc)
{
ComputeRC(R(EAX));
ComputeRC(gpr.R(a));
}
}
@ -1286,6 +1474,17 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
if (gpr.R(s).IsImm())
{
u32 mask = 0x80000000;
u32 i = 0;
for (; i < 32; i++, mask >>= 1)
if ((u32)gpr.R(s).offset & mask)
break;
gpr.SetImmediate32(a, i);
}
else
{
gpr.Lock(a, s);
gpr.KillImmediate(s, true, false);
gpr.BindToRegister(a, (a == s), true);
@ -1295,6 +1494,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
gpr.UnlockAll();
}
if (inst.Rc)
{