JitArm64: Restructure the BackPatchInfo flags enum

This makes it possible to construct flag combinations like
"load 16 bits into an FPR".
This commit is contained in:
JosJuice 2021-07-08 17:53:01 +02:00
parent 059794b551
commit 36b3d51523
4 changed files with 52 additions and 51 deletions

View File

@ -58,14 +58,14 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
if (fastmem) if (fastmem)
{ {
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
{ {
if (flags & BackPatchInfo::FLAG_SIZE_F32) if ((flags & BackPatchInfo::FLAG_SIZE_32) && !(flags & BackPatchInfo::FLAG_PAIR))
{ {
m_float_emit.REV32(8, ARM64Reg::D0, RS); m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr); m_float_emit.STR(32, ARM64Reg::D0, MEM_REG, addr);
} }
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) else if ((flags & BackPatchInfo::FLAG_SIZE_32) && (flags & BackPatchInfo::FLAG_PAIR))
{ {
m_float_emit.REV32(8, ARM64Reg::D0, RS); m_float_emit.REV32(8, ARM64Reg::D0, RS);
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr); m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
@ -76,9 +76,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr); m_float_emit.STR(64, ARM64Reg::Q0, MEM_REG, addr);
} }
} }
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT) else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{ {
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_32)
{ {
m_float_emit.LDR(32, EncodeRegToDouble(RS), MEM_REG, addr); m_float_emit.LDR(32, EncodeRegToDouble(RS), MEM_REG, addr);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS)); m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
@ -158,15 +158,15 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
ABI_PushRegisters(gprs_to_push); ABI_PushRegisters(gprs_to_push);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
if (flags & BackPatchInfo::FLAG_STORE && flags & BackPatchInfo::FLAG_MASK_FLOAT) if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
{ {
if (flags & BackPatchInfo::FLAG_SIZE_F32) if ((flags & BackPatchInfo::FLAG_SIZE_32) && !(flags & BackPatchInfo::FLAG_PAIR))
{ {
m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0); m_float_emit.UMOV(32, ARM64Reg::W0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U32);
BLR(ARM64Reg::X8); BLR(ARM64Reg::X8);
} }
else if (flags & BackPatchInfo::FLAG_SIZE_F32X2) else if ((flags & BackPatchInfo::FLAG_SIZE_32) && (flags & BackPatchInfo::FLAG_PAIR))
{ {
m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0); m_float_emit.UMOV(64, ARM64Reg::X0, RS, 0);
MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64); MOVP2R(ARM64Reg::X8, &PowerPC::Write_U64);
@ -180,9 +180,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode, AR
BLR(ARM64Reg::X8); BLR(ARM64Reg::X8);
} }
} }
else if (flags & BackPatchInfo::FLAG_LOAD && flags & BackPatchInfo::FLAG_MASK_FLOAT) else if ((flags & BackPatchInfo::FLAG_LOAD) && (flags & BackPatchInfo::FLAG_FLOAT))
{ {
if (flags & BackPatchInfo::FLAG_SIZE_F32) if (flags & BackPatchInfo::FLAG_SIZE_32)
{ {
MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32); MOVP2R(ARM64Reg::X8, &PowerPC::Read_U32);
BLR(ARM64Reg::X8); BLR(ARM64Reg::X8);

View File

@ -26,7 +26,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
u32 a = inst.RA, b = inst.RB; u32 a = inst.RA, b = inst.RB;
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_LOAD; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT;
bool update = false; bool update = false;
s32 offset_reg = -1; s32 offset_reg = -1;
@ -36,38 +36,38 @@ void JitArm64::lfXX(UGeckoInstruction inst)
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 567: // lfsux case 567: // lfsux
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
update = true; update = true;
offset_reg = b; offset_reg = b;
break; break;
case 535: // lfsx case 535: // lfsx
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
offset_reg = b; offset_reg = b;
break; break;
case 631: // lfdux case 631: // lfdux
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
update = true; update = true;
offset_reg = b; offset_reg = b;
break; break;
case 599: // lfdx case 599: // lfdx
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
offset_reg = b; offset_reg = b;
break; break;
} }
break; break;
case 49: // lfsu case 49: // lfsu
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
update = true; update = true;
break; break;
case 48: // lfs case 48: // lfs
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
break; break;
case 51: // lfdu case 51: // lfdu
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
update = true; update = true;
break; break;
case 50: // lfd case 50: // lfd
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
break; break;
} }
@ -75,7 +75,7 @@ void JitArm64::lfXX(UGeckoInstruction inst)
bool is_immediate = false; bool is_immediate = false;
const RegType type = const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_F64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W0, ARM64Reg::W30); gpr.Lock(ARM64Reg::W0, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0); fpr.Lock(ARM64Reg::Q0);
@ -190,7 +190,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
bool want_single = false; bool want_single = false;
s32 offset = inst.SIMM_16; s32 offset = inst.SIMM_16;
u32 flags = BackPatchInfo::FLAG_STORE; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT;
bool update = false; bool update = false;
s32 offset_reg = -1; s32 offset_reg = -1;
@ -201,46 +201,46 @@ void JitArm64::stfXX(UGeckoInstruction inst)
{ {
case 663: // stfsx case 663: // stfsx
want_single = true; want_single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
offset_reg = b; offset_reg = b;
break; break;
case 695: // stfsux case 695: // stfsux
want_single = true; want_single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
update = true; update = true;
offset_reg = b; offset_reg = b;
break; break;
case 727: // stfdx case 727: // stfdx
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
offset_reg = b; offset_reg = b;
break; break;
case 759: // stfdux case 759: // stfdux
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
update = true; update = true;
offset_reg = b; offset_reg = b;
break; break;
case 983: // stfiwx case 983: // stfiwx
// This instruction writes the lower 32 bits of a double. want_single must be false // This instruction writes the lower 32 bits of a double. want_single must be false
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
offset_reg = b; offset_reg = b;
break; break;
} }
break; break;
case 53: // stfsu case 53: // stfsu
want_single = true; want_single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
update = true; update = true;
break; break;
case 52: // stfs case 52: // stfs
want_single = true; want_single = true;
flags |= BackPatchInfo::FLAG_SIZE_F32; flags |= BackPatchInfo::FLAG_SIZE_32;
break; break;
case 55: // stfdu case 55: // stfdu
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
update = true; update = true;
break; break;
case 54: // stfd case 54: // stfd
flags |= BackPatchInfo::FLAG_SIZE_F64; flags |= BackPatchInfo::FLAG_SIZE_64;
break; break;
} }
@ -361,16 +361,16 @@ void JitArm64::stfXX(UGeckoInstruction inst)
if (jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) if (jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr))
{ {
int accessSize; int accessSize;
if (flags & BackPatchInfo::FLAG_SIZE_F64) if (flags & BackPatchInfo::FLAG_SIZE_64)
accessSize = 64; accessSize = 64;
else else
accessSize = 32; accessSize = 32;
LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr)); LDR(IndexType::Unsigned, ARM64Reg::X0, PPC_REG, PPCSTATE_OFF(gather_pipe_ptr));
if (flags & BackPatchInfo::FLAG_SIZE_F64) if (flags & BackPatchInfo::FLAG_SIZE_64)
m_float_emit.REV64(8, ARM64Reg::Q0, V0); m_float_emit.REV64(8, ARM64Reg::Q0, V0);
else if (flags & BackPatchInfo::FLAG_SIZE_F32) else if (flags & BackPatchInfo::FLAG_SIZE_32)
m_float_emit.REV32(8, ARM64Reg::D0, V0); m_float_emit.REV32(8, ARM64Reg::D0, V0);
m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0, m_float_emit.STR(accessSize, IndexType::Post, accessSize == 64 ? ARM64Reg::Q0 : ARM64Reg::D0,

View File

@ -198,9 +198,9 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize) if (js.assumeNoPairedQuantize)
{ {
u32 flags = BackPatchInfo::FLAG_STORE; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= (w ? BackPatchInfo::FLAG_SIZE_F32 : BackPatchInfo::FLAG_SIZE_F32X2); flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use, EmitBackpatchRoutine(flags, jo.fastmem, jo.fastmem, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use); fprs_in_use);

View File

@ -13,31 +13,32 @@ struct BackPatchInfo
FLAG_SIZE_8 = (1 << 2), FLAG_SIZE_8 = (1 << 2),
FLAG_SIZE_16 = (1 << 3), FLAG_SIZE_16 = (1 << 3),
FLAG_SIZE_32 = (1 << 4), FLAG_SIZE_32 = (1 << 4),
FLAG_SIZE_F32 = (1 << 5), FLAG_SIZE_64 = (1 << 5),
FLAG_SIZE_F32X2 = (1 << 6), FLAG_FLOAT = (1 << 6),
FLAG_SIZE_F64 = (1 << 7), FLAG_PAIR = (1 << 7),
FLAG_REVERSE = (1 << 8), FLAG_REVERSE = (1 << 8),
FLAG_EXTEND = (1 << 9), FLAG_EXTEND = (1 << 9),
FLAG_ZERO_256 = (1 << 10), FLAG_ZERO_256 = (1 << 10),
FLAG_MASK_FLOAT = FLAG_SIZE_F32 | FLAG_SIZE_F32X2 | FLAG_SIZE_F64,
}; };
static u32 GetFlagSize(u32 flags) static u32 GetFlagSize(u32 flags)
{ {
u32 size = 0;
if (flags & FLAG_SIZE_8) if (flags & FLAG_SIZE_8)
return 8; size = 8;
if (flags & FLAG_SIZE_16) if (flags & FLAG_SIZE_16)
return 16; size = 16;
if (flags & FLAG_SIZE_32) if (flags & FLAG_SIZE_32)
return 32; size = 32;
if (flags & FLAG_SIZE_F32) if (flags & FLAG_SIZE_64)
return 32; size = 64;
if (flags & FLAG_SIZE_F32X2)
return 64;
if (flags & FLAG_SIZE_F64)
return 64;
if (flags & FLAG_ZERO_256) if (flags & FLAG_ZERO_256)
return 256; size = 256;
return 0;
if (flags & FLAG_PAIR)
size *= 2;
return size;
} }
}; };