mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-07-26 23:59:54 -06:00
Merge pull request #1621 from FioraAeterna/pscleanup
JIT: cleanups/optimizations for ps loadstore
This commit is contained in:
@ -212,8 +212,6 @@ static const float GC_ALIGNED16(m_dequantizeTableS[]) =
|
|||||||
(1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
|
(1ULL << 4), (1ULL << 4), (1ULL << 3), (1ULL << 3), (1ULL << 2), (1ULL << 2), (1ULL << 1), (1ULL << 1),
|
||||||
};
|
};
|
||||||
|
|
||||||
static float GC_ALIGNED16(psTemp[4]);
|
|
||||||
|
|
||||||
static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
|
static const float GC_ALIGNED16(m_65535[4]) = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
|
||||||
static const float GC_ALIGNED16(m_32767) = 32767.0f;
|
static const float GC_ALIGNED16(m_32767) = 32767.0f;
|
||||||
static const float GC_ALIGNED16(m_m32768) = -32768.0f;
|
static const float GC_ALIGNED16(m_m32768) = -32768.0f;
|
||||||
@ -230,36 +228,26 @@ static const float GC_ALIGNED16(m_one[]) = {1.0f, 0.0f, 0.0f, 0.0f};
|
|||||||
// I don't know whether the overflow actually happens in any games
|
// I don't know whether the overflow actually happens in any games
|
||||||
// but it potentially can cause problems, so we need some clamping
|
// but it potentially can cause problems, so we need some clamping
|
||||||
|
|
||||||
static void WriteDual32(u32 address)
|
|
||||||
{
|
|
||||||
Memory::Write_U64(*(u64 *) psTemp, address);
|
|
||||||
}
|
|
||||||
|
|
||||||
// See comment in header for in/outs.
|
// See comment in header for in/outs.
|
||||||
void CommonAsmRoutines::GenQuantizedStores()
|
void CommonAsmRoutines::GenQuantizedStores()
|
||||||
{
|
{
|
||||||
const u8* storePairedIllegal = AlignCode4();
|
const u8* storePairedIllegal = AlignCode4();
|
||||||
UD2();
|
UD2();
|
||||||
const u8* storePairedFloat = AlignCode4();
|
|
||||||
|
|
||||||
FixupBranch skip_complex, too_complex;
|
const u8* storePairedFloat = AlignCode4();
|
||||||
SHUFPS(XMM0, R(XMM0), 1);
|
if (cpu_info.bSSSE3)
|
||||||
MOVQ_xmm(M(&psTemp[0]), XMM0);
|
|
||||||
if (!jit->js.memcheck)
|
|
||||||
{
|
{
|
||||||
TEST(32, R(RSCRATCH_EXTRA), Imm32(0x0C000000));
|
PSHUFB(XMM0, M((void *)pbswapShuffle2x4));
|
||||||
too_complex = J_CC(CC_NZ, true);
|
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||||
MOV(64, R(RSCRATCH), M(&psTemp[0]));
|
|
||||||
SwapAndStore(64, MComplex(RMEM, RSCRATCH_EXTRA, SCALE_1, 0), RSCRATCH);
|
|
||||||
skip_complex = J(true);
|
|
||||||
SetJumpTarget(too_complex);
|
|
||||||
}
|
}
|
||||||
// RSP alignment here is 8 due to the call.
|
else
|
||||||
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
{
|
||||||
ABI_CallFunctionR((void *)&WriteDual32, RSCRATCH_EXTRA);
|
MOVQ_xmm(R(RSCRATCH), XMM0);
|
||||||
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
|
ROL(64, R(RSCRATCH), Imm8(32));
|
||||||
if (!jit->js.memcheck)
|
BSWAP(64, RSCRATCH);
|
||||||
SetJumpTarget(skip_complex);
|
}
|
||||||
|
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 64, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||||
|
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
const u8* storePairedU8 = AlignCode4();
|
const u8* storePairedU8 = AlignCode4();
|
||||||
@ -316,12 +304,8 @@ void CommonAsmRoutines::GenQuantizedStores()
|
|||||||
MINPS(XMM0, M(m_65535));
|
MINPS(XMM0, M(m_65535));
|
||||||
|
|
||||||
CVTTPS2DQ(XMM0, R(XMM0));
|
CVTTPS2DQ(XMM0, R(XMM0));
|
||||||
MOVQ_xmm(M(psTemp), XMM0);
|
PSHUFLW(XMM0, R(XMM0), 2); // AABBCCDD -> CCAA____
|
||||||
// place ps[0] into the higher word, ps[1] into the lower
|
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||||
// so no need in ROL after BSWAP
|
|
||||||
MOVZX(32, 16, RSCRATCH, M(&psTemp[0]));
|
|
||||||
SHL(32, R(RSCRATCH), Imm8(16));
|
|
||||||
MOV(16, R(RSCRATCH), M(&psTemp[1]));
|
|
||||||
BSWAP(32, RSCRATCH);
|
BSWAP(32, RSCRATCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,21 +353,6 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||||||
MOVD_xmm(R(RSCRATCH), XMM0);
|
MOVD_xmm(R(RSCRATCH), XMM0);
|
||||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||||
RET();
|
RET();
|
||||||
/*
|
|
||||||
if (cpu_info.bSSSE3)
|
|
||||||
{
|
|
||||||
PSHUFB(XMM0, M(pbswapShuffle2x4));
|
|
||||||
// TODO: SafeWriteFloat
|
|
||||||
MOVSS(M(&psTemp[0]), XMM0);
|
|
||||||
MOV(32, R(RSCRATCH), M(&psTemp[0]));
|
|
||||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOVSS(M(&psTemp[0]), XMM0);
|
|
||||||
MOV(32, R(RSCRATCH), M(&psTemp[0]));
|
|
||||||
SafeWriteRegToReg(RSCRATCH, RSCRATCH_EXTRA, 32, 0, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
const u8* storeSingleU8 = AlignCode4(); // Used by MKWii
|
||||||
SHR(32, R(RSCRATCH2), Imm8(5));
|
SHR(32, R(RSCRATCH2), Imm8(5));
|
||||||
@ -441,6 +410,12 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||||||
const u8* loadPairedIllegal = AlignCode4();
|
const u8* loadPairedIllegal = AlignCode4();
|
||||||
UD2();
|
UD2();
|
||||||
|
|
||||||
|
// FIXME? This code (in non-MMU mode) assumes all accesses are directly to RAM, i.e.
|
||||||
|
// don't need hardware access handling. This will definitely crash if paired loads occur
|
||||||
|
// from non-RAM areas, but as far as I know, this never happens. I don't know if this is
|
||||||
|
// for a good reason, or merely because no game does this.
|
||||||
|
// If we find something that actually does do this, maybe this should be changed. How
|
||||||
|
// much of a performance hit would it be?
|
||||||
const u8* loadPairedFloatTwo = AlignCode4();
|
const u8* loadPairedFloatTwo = AlignCode4();
|
||||||
if (jit->js.memcheck)
|
if (jit->js.memcheck)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user